{"question": "How many traffic accidents were reported in the state of California in the last 6 months?", "schema": "CREATE TABLE traffic_accidents (id INT, date DATE, state VARCHAR(255)); INSERT INTO traffic_accidents (id, date, state) VALUES (1, '2022-01-01', 'California'), (2, '2022-01-15', 'California'), (3, '2022-02-01', 'California');", "sql": "SELECT COUNT(*) FROM traffic_accidents WHERE state = 'California' AND date > DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Which destination in Mexico had the highest increase in visitors from 2019 to 2022?", "schema": "CREATE TABLE mexico_tourism (destination VARCHAR(50), year INT, visitors INT); INSERT INTO mexico_tourism (destination, year, visitors) VALUES ('Cancun', 2019, 2000000), ('Cancun', 2022, 2500000), ('Puerto Vallarta', 2019, 1500000), ('Puerto Vallarta', 2022, 2000000);", "sql": "SELECT destination, MAX(visitors) - MIN(visitors) AS increase FROM mexico_tourism WHERE year IN (2019, 2022) GROUP BY destination ORDER BY increase DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What is the maximum salary for male employees hired before June 2021?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(50), Gender VARCHAR(10), Salary FLOAT, HireDate DATE); INSERT INTO Employees (EmployeeID, Department, Gender, Salary, HireDate) VALUES (1, 'IT', 'Male', 85000, '2021-04-20'), (2, 'HR', 'Female', 75000, '2019-12-15'), (3, 'IT', 'Female', 80000, '2020-01-08'), (4, 'IT', 'Male', 90000, '2021-04-01'), (5, 'Finance', 'Male', 75000, '2019-12-28'), (6, 'IT', 'Male', 88000, '2021-05-12'), (7, 'Marketing', 'Female', 78000, '2021-07-01'), (8, 'HR', 'Male', 80000, '2021-02-15');", "sql": "SELECT MAX(Salary) FROM Employees WHERE Gender = 'Male' AND HireDate < '2021-06-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHich Name origin has a Longitude of 332.5e?", "schema": "CREATE TABLE table_name_94 (name VARCHAR, longitude VARCHAR)", "sql": "SELECT name AS origin FROM table_name_94 WHERE longitude = '332.5e';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the maximum selling price of natural makeup products in France?", "schema": "CREATE TABLE MakeupProducts (product_id INT, product_name VARCHAR(255), price DECIMAL(5,2), is_natural BOOLEAN, country VARCHAR(50));", "sql": "SELECT MAX(price) FROM MakeupProducts WHERE is_natural = TRUE AND country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which organizations were founded in 1972, but became WOSM members until 1977?", "schema": "CREATE TABLE table_104858_1 (name_of_member_organization VARCHAR, year_member_organization_was_founded VARCHAR, year_current_scouting_organization_joined_wosm VARCHAR)", "sql": "SELECT name_of_member_organization FROM table_104858_1 WHERE year_member_organization_was_founded = '1972' AND year_current_scouting_organization_joined_wosm = '1977';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Away team Swindon Town had a Tie no listing of Replay with what as an Attendance?", "schema": "CREATE TABLE table_name_74 (attendance VARCHAR, tie_no VARCHAR, away_team VARCHAR)", "sql": "SELECT attendance FROM table_name_74 WHERE tie_no = 'replay' AND away_team = 'swindon town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 112).", "schema": null, "sql": "CREATE SCHEMA testpub_rf_schema1;", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 541).", "schema": null, "sql": "INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 51, "num_statements": 1} {"question": "Find the average mental health score of students in the 'Spring' semester", "schema": "CREATE TABLE student_mental_health (student_id INT, semester VARCHAR(10), mental_health_score INT); INSERT INTO student_mental_health (student_id, semester, mental_health_score) VALUES (1, 'Spring', 75), (2, 'Spring', 80), (3, 'Fall', 70);", "sql": "SELECT AVG(mental_health_score) FROM student_mental_health WHERE semester = 'Spring';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'isn' (example 18).", "schema": null, "sql": "SELECT '1234567890120'::EAN13;", "explanation": "Example query from the 'isn' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 171).", "schema": null, "sql": "SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years was the pageant miss globe international and delegate was karen loren medrano agustin?", "schema": "CREATE TABLE table_1825751_14 (year VARCHAR, pageant VARCHAR, delegate VARCHAR)", "sql": "SELECT COUNT(year) FROM table_1825751_14 WHERE pageant = 'Miss Globe International' AND delegate = 'Karen Loren Medrano Agustin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the average water consumption per household in the city of Seattle?", "schema": "CREATE TABLE Household (ID INT, City VARCHAR(20), Consumption FLOAT); INSERT INTO Household (ID, City, Consumption) VALUES (1, 'Seattle', 12.3), (2, 'NYC', 10.5), (3, 'Seattle', 13.8);", "sql": "SELECT AVG(Consumption) FROM Household WHERE City = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many individuals have been served by access to justice initiatives in Europe since 2017?", "schema": "CREATE TABLE initiatives (initiative_id INT, year INT, individuals_served INT); INSERT INTO initiatives (initiative_id, year, individuals_served) VALUES (1, 2017, 2000), (2, 2018, 3000); CREATE TABLE locations (initiative_id INT, region VARCHAR(20)); INSERT INTO locations (initiative_id, region) VALUES (1, 'Europe'), (2, 'North America');", "sql": "SELECT SUM(initiatives.individuals_served) FROM initiatives INNER JOIN locations ON initiatives.initiative_id = locations.initiative_id WHERE locations.region = 'Europe' AND initiatives.year >= 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many draws have rumba/tango dance styles?", "schema": "CREATE TABLE table_name_79 (draw VARCHAR, dance_styles VARCHAR)", "sql": "SELECT COUNT(draw) FROM table_name_79 WHERE dance_styles = 'rumba/tango';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the percentage of female faculty members in the university?", "schema": "CREATE TABLE university_faculty (id INT, gender VARCHAR(10)); INSERT INTO university_faculty (id, gender) VALUES (1, 'Female'), (2, 'Male'), (3, 'Male'), (4, 'Female'), (5, 'Female');", "sql": "SELECT ROUND(100.0 * SUM(CASE WHEN gender = 'Female' THEN 1 ELSE 0 END) / COUNT(*), 1) as pct_female_faculty;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is team 1 if Renova is team 2?", "schema": "CREATE TABLE table_name_99 (team_1 VARCHAR, team_2 VARCHAR)", "sql": "SELECT team_1 FROM table_name_99 WHERE team_2 = 'renova';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many matches did the player that played 23 matches win", "schema": "CREATE TABLE table_29302711_12 (matches_won INTEGER, matches_played VARCHAR)", "sql": "SELECT MIN(matches_won) FROM table_29302711_12 WHERE matches_played = 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total revenue generated by each product type, sorted by the total revenue in descending order?", "schema": "CREATE TABLE RevenueByProduct (product VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO RevenueByProduct (product, revenue) VALUES ('Flower', 50000), ('Concentrates', 35000), ('Edibles', 40000), ('Topicals', 25000);", "sql": "SELECT product, SUM(revenue) as total_revenue FROM RevenueByProduct GROUP BY product ORDER BY total_revenue DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of clubs before 2003 with a 4th place winner of Shenzhen Jianlibao?", "schema": "CREATE TABLE table_name_66 (number_of_clubs INTEGER, fourth_placed VARCHAR, season VARCHAR)", "sql": "SELECT AVG(number_of_clubs) FROM table_name_66 WHERE fourth_placed = 'shenzhen jianlibao' AND season < 2003;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "How many incidents were recorded for Vessel3 between January 1, 2021 and June 30, 2021?", "schema": "CREATE TABLE VesselIncidents(IncidentID INT, VesselID INT, IncidentType TEXT, IncidentDate DATETIME); INSERT INTO VesselIncidents(IncidentID, VesselID, IncidentType, IncidentDate) VALUES (1, 3, 'Collision', '2021-03-15 14:30:00'), (2, 3, 'Mechanical Failure', '2021-05-02 08:00:00');", "sql": "SELECT COUNT(*) FROM VesselIncidents WHERE VesselID = 3 AND IncidentDate BETWEEN '2021-01-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number played of the team with 1 drawn and 24 against?", "schema": "CREATE TABLE table_name_79 (played INTEGER, drawn VARCHAR, against VARCHAR)", "sql": "SELECT AVG(played) FROM table_name_79 WHERE drawn = 1 AND against = 24;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the product, chromosome and porphyria related to the enzymes which take effect at the location 'Cytosol'?", "schema": "CREATE TABLE enzyme (product VARCHAR, chromosome VARCHAR, porphyria VARCHAR, LOCATION VARCHAR)", "sql": "SELECT product, chromosome, porphyria FROM enzyme WHERE LOCATION = 'Cytosol';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total number of players who play games on each platform and in each city?", "schema": "CREATE TABLE Players (PlayerID INT, City VARCHAR(20), Platform VARCHAR(10)); INSERT INTO Players (PlayerID, City, Platform) VALUES (1, 'Tokyo', 'PC'), (2, 'Los Angeles', 'Console'), (3, 'New York', 'PC'), (4, 'Paris', 'VR'), (5, 'Tokyo', 'Console'), (6, 'Los Angeles', 'PC');", "sql": "SELECT City, Platform, COUNT(*) AS Count FROM Players GROUP BY City, Platform ORDER BY Count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 56).", "schema": null, "sql": "INSERT INTO f_star (class) VALUES ('f');", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 68).", "schema": null, "sql": "CREATE FUNCTION gbt_int4_distance(internal,int4,int2,oid,internal)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What is the total donation amount for each preservation project in each location?", "schema": "CREATE TABLE PreservationProjects (id INT, name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE, donation_amount FLOAT);", "sql": "SELECT p.location, p.name, SUM(p.donation_amount) FROM PreservationProjects p GROUP BY p.location, p.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "How many 'high' severity security incidents were recorded in the last month for the 'finance' department?", "schema": "CREATE TABLE incidents (id INT, department VARCHAR(255), severity VARCHAR(255), incident_date DATE); INSERT INTO incidents (id, department, severity, incident_date) VALUES (1, 'finance', 'high', '2022-04-15'), (2, 'IT', 'medium', '2022-02-20'), (3, 'finance', 'medium', '2022-03-05'); SELECT CURDATE(), DATE_SUB(CURDATE(), INTERVAL 1 MONTH) INTO @current_date, @start_date; SELECT COUNT(*) FROM incidents WHERE department = 'finance' AND severity = 'high' AND incident_date BETWEEN @start_date AND @current_date;", "sql": "SELECT COUNT(*) FROM incidents WHERE department = 'finance' AND severity = 'high' AND incident_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 MONTH) AND CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the total transaction value for each month of the year 2022?", "schema": "CREATE TABLE transactions (transaction_id INT, transaction_date DATE, transaction_category VARCHAR(255), transaction_value DECIMAL(10,2)); INSERT INTO transactions (transaction_id, transaction_date, transaction_category, transaction_value) VALUES (1, '2022-01-02', 'Food', 75.00), (2, '2022-02-05', 'Electronics', 350.00), (3, '2022-03-10', 'Clothing', 200.00);", "sql": "SELECT YEAR(transaction_date) as year, MONTH(transaction_date) as month, SUM(transaction_value) as total_value FROM transactions WHERE transaction_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY year, month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "What is the total amount of funding received by 'community_development' table where the 'community_name' is 'peace_village'?", "schema": "CREATE TABLE community_development (id INT, community_name TEXT, community_size INT, region TEXT, funding FLOAT);", "sql": "SELECT SUM(funding) FROM community_development WHERE community_name = 'peace_village';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game on November 9 when Atlanta was the visiting team?", "schema": "CREATE TABLE table_name_73 (score VARCHAR, visitor VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_73 WHERE visitor = 'atlanta' AND date = 'november 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance of the bowl game in Gainesville, Fl?", "schema": "CREATE TABLE table_15190346_2 (attendance VARCHAR, location VARCHAR)", "sql": "SELECT attendance FROM table_15190346_2 WHERE location = 'Gainesville, FL';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "List the names and types of all military technologies that were developed in the last 5 years.", "schema": "CREATE TABLE military_technology (id INT, name VARCHAR(255), technology_type VARCHAR(255), development_date DATE);INSERT INTO military_technology (id, name, technology_type, development_date) VALUES (1, 'F-35', 'Fighter Jet', '2010-01-01'), (2, 'M1 Abrams', 'Tank', '2005-01-01'), (3, 'Patriot', 'Missile System', '2015-01-01');", "sql": "SELECT name, technology_type FROM military_technology WHERE YEAR(development_date) >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "How many packages were shipped from Colombia to Brazil in Q1 2022?", "schema": "CREATE TABLE packages (package_id INT, origin_country VARCHAR(255), destination_country VARCHAR(255), shipped_qty INT, shipped_quarter INT, shipped_year INT); INSERT INTO packages (package_id, origin_country, destination_country, shipped_qty, shipped_quarter, shipped_year) VALUES (1, 'Colombia', 'Brazil', 250, 1, 2022), (2, 'Brazil', 'Argentina', 300, 1, 2022), (3, 'Chile', 'Brazil', 350, 1, 2022);", "sql": "SELECT SUM(shipped_qty) as total_shipped FROM packages WHERE origin_country = 'Colombia' AND destination_country = 'Brazil' AND shipped_quarter = 1 AND shipped_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the ga date of the t5120 model?", "schema": "CREATE TABLE table_10818465_1 (ga_date VARCHAR, model VARCHAR)", "sql": "SELECT ga_date FROM table_10818465_1 WHERE model = 'T5120';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many were written by Peter Winther?", "schema": "CREATE TABLE table_20704243_3 (written_by VARCHAR, directed_by VARCHAR)", "sql": "SELECT COUNT(written_by) FROM table_20704243_3 WHERE directed_by = 'Peter Winther';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the race on 9 July?", "schema": "CREATE TABLE table_name_72 (name VARCHAR, date VARCHAR)", "sql": "SELECT name FROM table_name_72 WHERE date = '9 july';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 77).", "schema": null, "sql": "SELECT '[1,2]'::jsonb @> '[1,2,2]'::jsonb;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[1,2]'::jsonb @> '[1,2,2]'::jsonb) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the country of the airlines whose name starts with 'Orbit'.", "schema": "CREATE TABLE airlines (country VARCHAR, name VARCHAR)", "sql": "SELECT country FROM airlines WHERE name LIKE 'Orbit%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Bronze has a Rank of 4, and a Gold smaller than 0?", "schema": "CREATE TABLE table_name_7 (bronze INTEGER, rank VARCHAR, gold VARCHAR)", "sql": "SELECT MAX(bronze) FROM table_name_7 WHERE rank = 4 AND gold < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did wilson reis fight against that lasted less than 3 rounds with a time of 1:02?", "schema": "CREATE TABLE table_name_25 (opponent VARCHAR, round VARCHAR, time VARCHAR)", "sql": "SELECT opponent FROM table_name_25 WHERE round < 3 AND time = '1:02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the opponent at the game that had a loss of Travers (0–2)?", "schema": "CREATE TABLE table_name_9 (opponent VARCHAR, loss VARCHAR)", "sql": "SELECT opponent FROM table_name_9 WHERE loss = 'travers (0–2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Time/Retired has less than 77 laps, and a Grid of 16?", "schema": "CREATE TABLE table_name_68 (time_retired VARCHAR, laps VARCHAR, grid VARCHAR)", "sql": "SELECT time_retired FROM table_name_68 WHERE laps < 77 AND grid = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total amount of climate finance invested in renewable energy in Africa?", "schema": "CREATE TABLE ClimateFinance (Country TEXT, Sector TEXT, Investment_Amount NUMERIC); INSERT INTO ClimateFinance (Country, Sector, Investment_Amount) VALUES ('South Africa', 'Renewable Energy', 5000000), ('Kenya', 'Renewable Energy', 2000000), ('Nigeria', 'Renewable Energy', 7000000);", "sql": "SELECT SUM(Investment_Amount) FROM ClimateFinance WHERE Country IN ('South Africa', 'Kenya', 'Nigeria') AND Sector = 'Renewable Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the average number of players per multiplayer game in Europe?", "schema": "CREATE TABLE Games (GameID INT, GameType VARCHAR(255), Multiplayer INT); INSERT INTO Games (GameID, GameType, Multiplayer) VALUES (1, 'Racing', 0); INSERT INTO Games (GameID, GameType, Multiplayer) VALUES (2, 'Shooter', 1); CREATE TABLE Players (PlayerID INT, GameID INT); INSERT INTO Players (PlayerID, GameID) VALUES (1, 1); INSERT INTO Players (PlayerID, GameID) VALUES (1, 2); INSERT INTO Players (PlayerID, GameID) VALUES (2, 2); INSERT INTO Players (PlayerID, GameID) VALUES (3, 2); INSERT INTO Players (PlayerID, GameID) VALUES (4, 2); INSERT INTO Players (PlayerID, GameID) VALUES (5, 2); INSERT INTO Players (PlayerID, GameID) VALUES (6, 2); INSERT INTO Players (PlayerID, GameID) VALUES (7, 2); INSERT INTO Players (PlayerID, GameID) VALUES (8, 2); INSERT INTO Players (PlayerID, GameID) VALUES (9, 2); INSERT INTO Players (PlayerID, GameID) VALUES (10, 2);", "sql": "SELECT AVG(CountPlayers) FROM (SELECT GameID, COUNT(PlayerID) AS CountPlayers FROM Players INNER JOIN Games ON Players.GameID = Games.GameID WHERE Games.Multiplayer = 1 GROUP BY GameID) AS Subquery WHERE EXISTS (SELECT GameID FROM Games WHERE ReleaseCountry LIKE '%Europe%' AND Games.GameID = Subquery.GameID);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 310, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score when the team is milwaukee?", "schema": "CREATE TABLE table_27734769_8 (score VARCHAR, team VARCHAR)", "sql": "SELECT score FROM table_27734769_8 WHERE team = 'Milwaukee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the game played on April 16 with Philadelphia as home team?", "schema": "CREATE TABLE table_name_64 (result VARCHAR, home_team VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_64 WHERE home_team = 'philadelphia' AND date = 'april 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Calculate the percentage of unresolved vulnerabilities by region", "schema": "CREATE TABLE vulnerabilities (id INT, region VARCHAR(10), status VARCHAR(10)); INSERT INTO vulnerabilities (id, region, status) VALUES (1, 'EMEA', 'Resolved'), (2, 'APAC', 'Unresolved'), (3, 'AMER', 'Resolved'), (4, 'EMEA', 'Resolved'), (5, 'APAC', 'Resolved'), (6, 'AMER', 'Unresolved');", "sql": "SELECT region, 100.0 * SUM(CASE WHEN status = 'Unresolved' THEN 1 ELSE 0 END) / COUNT(*) as unresolved_percentage FROM vulnerabilities GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the points for ktm-vmc equipment?", "schema": "CREATE TABLE table_16729457_18 (points VARCHAR, equipment VARCHAR)", "sql": "SELECT points FROM table_16729457_18 WHERE equipment = 'KTM-VMC';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Identify the menu items that were 86'ed (ran out) before 5pm on a specific day for the 'Gourmet Greens' restaurant.", "schema": "CREATE TABLE menu_items (id INT, restaurant_id INT, name VARCHAR(50), available INT, available_time TIME); INSERT INTO menu_items (id, restaurant_id, name, available, available_time) VALUES (1, 3, 'Kale Salad', 10, '12:00:00'), (2, 3, 'Beet Burger', 15, '12:00:00'), (3, 3, 'Carrot Fries', 20, '12:00:00'), (4, 3, 'Quinoa Bowl', 30, '12:00:00');", "sql": "SELECT name FROM menu_items WHERE restaurant_id = 3 AND available_time < '17:00:00' GROUP BY name HAVING SUM(available) = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (create_table_with, item 19).", "schema": null, "sql": "CREATE TABLE t2(time timestamptz, device text, value float) WITH (timescaledb.nonexistent_param = false);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For which Game 4 did Michael O'Connor play wing position?", "schema": "CREATE TABLE table_name_8 (game_4 VARCHAR, position VARCHAR, game_1 VARCHAR)", "sql": "SELECT game_4 FROM table_name_8 WHERE position = 'wing' AND game_1 = 'michael o'connor';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the number of unique users who have streamed music from each genre?", "schema": "CREATE TABLE user_genre_streams (stream_id int, user_id int, timestamp datetime, genre varchar(255)); INSERT INTO user_genre_streams (stream_id, user_id, timestamp, genre) VALUES (1, 123, '2022-01-01 10:00:00', 'Rock');", "sql": "SELECT genre, COUNT(DISTINCT user_id) as unique_users FROM user_genre_streams WHERE timestamp BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Insert data into the 'auto_show' table", "schema": "CREATE TABLE auto_show (id INT PRIMARY KEY, show_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE);", "sql": "INSERT INTO auto_show (id, show_name, location, start_date, end_date) VALUES (1, 'Paris Motor Show', 'Paris, France', '2023-10-01', '2023-10-15');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the name(namesakes) for the entire chart?", "schema": "CREATE TABLE table_29860752_11 (name__namesake_ VARCHAR)", "sql": "SELECT name__namesake_ FROM table_29860752_11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many entries are shown for date of successors formal installation where successor is john w. walker (dr)?", "schema": "CREATE TABLE table_225099_3 (date_of_successors_formal_installation VARCHAR, successor VARCHAR)", "sql": "SELECT COUNT(date_of_successors_formal_installation) FROM table_225099_3 WHERE successor = 'John W. Walker (DR)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the highest total when the horse is spender s", "schema": "CREATE TABLE table_name_99 (total INTEGER, horse VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_99 WHERE horse = 'spender s';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which nation has a Bronze and Silver smaller than 1 and a Gold larger than 1?", "schema": "CREATE TABLE table_name_87 (nation VARCHAR, gold VARCHAR, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT nation FROM table_name_87 WHERE bronze < 1 AND silver < 1 AND gold > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Find healthcare providers with low cultural competency scores serving Latinx in FL.", "schema": "CREATE TABLE healthcare_providers (provider_id INT, name TEXT, state TEXT); INSERT INTO healthcare_providers (provider_id, name, state) VALUES (1, 'Dr. Ana Perez', 'FL'); CREATE TABLE cultural_competency (provider_id INT, score INT, community TEXT);", "sql": "SELECT h.name, c.score FROM healthcare_providers h INNER JOIN cultural_competency c ON h.provider_id = c.provider_id WHERE h.state = 'FL' AND c.community = 'Latinx' AND c.score < 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "What is the average order size for each salesperson?", "schema": "CREATE TABLE salesperson (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO salesperson (id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE orders (id INT, salesperson_id INT, size INT); INSERT INTO orders (id, salesperson_id, size) VALUES (1, 1, 10), (2, 1, 15), (3, 2, 20), (4, 2, 25);", "sql": "SELECT salesperson_id, AVG(size) as avg_order_size FROM orders GROUP BY salesperson_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "pgTAP test for Valueset (assertion 230).", "schema": null, "sql": "/****************************************************************************/\n-- Now test set_eq().\n\nSELECT * FROM check_test(\n set_ne( 'anames', 'expect' ),\n false,\n 'set_ne(prepared, prepared) fail',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "What is the number of circular economy initiatives for each country in 2020?", "schema": "CREATE TABLE circular_economy (country VARCHAR(255), year INT, initiatives INT); INSERT INTO circular_economy (country, year, initiatives) VALUES ('USA', 2020, 15), ('Canada', 2020, 10), ('Mexico', 2020, 12);", "sql": "SELECT c.country, COUNT(c.initiatives) as num_initiatives FROM circular_economy c WHERE c.year = 2020 GROUP BY c.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the number of food safety violations per category?", "schema": "CREATE TABLE Menu_Categories (Category_ID INT, Category_Name TEXT); INSERT INTO Menu_Categories (Category_ID, Category_Name) VALUES (1, 'Main Dishes'), (2, 'Sides'); CREATE TABLE Menu_Items (Item_ID INT, Item_Name TEXT, Category_ID INT); INSERT INTO Menu_Items (Item_ID, Item_Name, Category_ID) VALUES (1, 'Burger', 1), (2, 'Pizza', 1), (3, 'Fries', 2), (4, 'Salad', 2); CREATE TABLE Inspections (Inspection_ID INT, Item_ID INT, Violation_Count INT); INSERT INTO Inspections (Inspection_ID, Item_ID, Violation_Count) VALUES (1, 1, 3), (2, 1, 2), (3, 2, 1), (4, 2, 0), (5, 3, 1), (6, 4, 0);", "sql": "SELECT MC.Category_Name, SUM(I.Violation_Count) as Total_Violations FROM Inspections I JOIN Menu_Items MI ON I.Item_ID = MI.Item_ID JOIN Menu_Categories MC ON MI.Category_ID = MC.Category_ID GROUP BY MC.Category_Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "Find the total revenue of organic haircare products in the Asian market for the current year.", "schema": "CREATE TABLE sales(product_id INT, sale_date DATE, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO sales VALUES (11, '2021-06-15', 50.00, 'CN'); INSERT INTO sales VALUES (12, '2021-07-16', 60.00, 'JP'); CREATE TABLE products(product_id INT, product_name VARCHAR(50), is_organic BOOLEAN, product_category VARCHAR(50)); INSERT INTO products VALUES (11, 'Rosemary Shampoo', TRUE, 'Haircare'); INSERT INTO products VALUES (12, 'Lavender Conditioner', TRUE, 'Haircare');", "sql": "SELECT SUM(sales.revenue) as total_revenue FROM sales JOIN products ON sales.product_id = products.product_id WHERE products.is_organic = TRUE AND sales.country = 'Asia' AND YEAR(sales.sale_date) = YEAR(CURDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "Which aircraft has the most accidents in a specific region?", "schema": "CREATE TABLE aircrafts (aircraft_id INT, model VARCHAR(50), region VARCHAR(50)); INSERT INTO aircrafts (aircraft_id, model, region) VALUES (1, 'Boeing 747', 'North America'), (2, 'Airbus A320', 'Europe'), (3, 'Boeing 737', 'Asia'); CREATE TABLE accidents (accident_id INT, aircraft_id INT, date DATE); INSERT INTO accidents (accident_id, aircraft_id) VALUES (1, 1), (2, 1), (3, 3), (4, 2), (5, 2);", "sql": "SELECT a.model, COUNT(*) as num_accidents FROM aircrafts a JOIN accidents b ON a.aircraft_id = b.aircraft_id WHERE a.region = 'North America' GROUP BY a.model ORDER BY num_accidents DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'encoding' (example 1).", "schema": null, "sql": "/* skip test if not UTF8 server encoding */\nSELECT getdatabaseencoding() <> 'UTF8' AS skip_test \\gset\n\\if :skip_test\n\\quit\n\\endif\n\n\\getenv libdir PG_LIBDIR\n\\getenv dlsuffix PG_DLSUFFIX\n\n\\set regresslib :libdir '/regress' :dlsuffix\n\nCREATE FUNCTION test_bytea_to_text(bytea) RETURNS text\n AS :'regresslib' LANGUAGE C STRICT;", "explanation": "PL/pgSQL object from PostgreSQL core test for Encoding.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 326, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 443).", "schema": null, "sql": "DECLARE cwant CURSOR FOR SELECT id, name FROM names WHERE name like 'An%' ORDER BY id;", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total weight loss in pounds for members who have lost weight since they joined?", "schema": "CREATE TABLE health_metrics (member_id INT, weight_loss_pounds FLOAT, last_checked DATE); INSERT INTO health_metrics (member_id, weight_loss_pounds, last_checked) VALUES (1, 3, '2021-01-15'), (2, 7, '2022-03-28');", "sql": "SELECT SUM(weight_loss_pounds) FROM health_metrics JOIN members ON health_metrics.member_id = members.member_id WHERE health_metrics.weight_loss_pounds > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average overall pick number for the USF player who was picked after round 5?", "schema": "CREATE TABLE table_name_23 (overall INTEGER, college VARCHAR, round VARCHAR)", "sql": "SELECT AVG(overall) FROM table_name_23 WHERE college = 'usf' AND round > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who directed the episode written cyrus nowrasteh?", "schema": "CREATE TABLE table_name_85 (directed_by VARCHAR, written_by VARCHAR)", "sql": "SELECT directed_by FROM table_name_85 WHERE written_by = 'cyrus nowrasteh';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 354).", "schema": null, "sql": "SELECT regexp_matches('foobarbequebaz'::citext, '(bar)(beque)') = ARRAY[ 'bar', 'beque' ] AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the opponent where tv is abc and game site is tampa stadium", "schema": "CREATE TABLE table_11406866_2 (opponent VARCHAR, tv VARCHAR, game_site VARCHAR)", "sql": "SELECT opponent FROM table_11406866_2 WHERE tv = 'ABC' AND game_site = 'Tampa Stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "How many local businesses have partnered with hotels in Japan and South Korea?", "schema": "CREATE TABLE BusinessPartnerships(id INT, business_id INT, hotel_id INT); INSERT INTO BusinessPartnerships(id, business_id, hotel_id) VALUES (1, 1, 1), (2, 2, 2), (3, 3, null), (6, 4, 3), (7, 5, 4); CREATE TABLE LocalBusinesses(id INT, name TEXT, country TEXT); INSERT INTO LocalBusinesses(id, name, country) VALUES (1, 'Surf Shop', 'Japan'), (2, 'Coffee House', 'Japan'), (3, 'Art Gallery', 'Japan'), (4, 'Ramen Shop', 'South Korea'), (5, 'Tea House', 'South Korea'); CREATE TABLE Hotels(id INT, name TEXT, country TEXT); INSERT INTO Hotels(id, name, country) VALUES (1, 'Ocean View', 'Japan'), (2, 'Harbor Inn', 'Japan'), (3, 'Seoul Palace', 'South Korea'), (4, 'Kyoto Garden', 'South Korea');", "sql": "SELECT COUNT(*) FROM BusinessPartnerships JOIN LocalBusinesses ON BusinessPartnerships.business_id = LocalBusinesses.id JOIN Hotels ON BusinessPartnerships.hotel_id = Hotels.id WHERE LocalBusinesses.country IN ('Japan', 'South Korea') AND Hotels.country IN ('Japan', 'South Korea');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 282, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For a team 2 of Al-Faisaly, what was the 2nd leg?", "schema": "CREATE TABLE table_name_96 (team_2 VARCHAR)", "sql": "SELECT 2 AS nd_leg FROM table_name_96 WHERE team_2 = 'al-faisaly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average carbon sequestration value in the 'central_region'?", "schema": "CREATE TABLE carbon_sequestration (id INT, region VARCHAR(50), value FLOAT); INSERT INTO carbon_sequestration (id, region, value) VALUES (1, 'Central Region', 56.78); INSERT INTO carbon_sequestration (id, region, value) VALUES (2, 'Northern Region', 34.56);", "sql": "SELECT AVG(value) FROM carbon_sequestration WHERE region = 'Central Region';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the lane with a time of 1:00.66?", "schema": "CREATE TABLE table_name_93 (lane INTEGER, time VARCHAR)", "sql": "SELECT AVG(lane) FROM table_name_93 WHERE time = '1:00.66';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 539).", "schema": null, "sql": "$$ language plpgsql;\n\nselect stacked_diagnostics_test();\n\ndrop function stacked_diagnostics_test();\n\n-- test variadic functions\n\ncreate or replace function vari(variadic int[])\nreturns void as $$\nbegin\n for i in array_lower($1,1)..array_upper($1,1) loop\n raise notice '%', $1[i];", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 283, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: Which To par has a Place in t9 and a Country of Spain?", "schema": "CREATE TABLE table_name_55 (to_par VARCHAR, place VARCHAR, country VARCHAR)", "sql": "SELECT to_par FROM table_name_55 WHERE place = 't9' AND country = 'spain';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the number of countries in the Latin America and Caribbean region that have reduced their carbon emissions in the last 5 years?", "schema": "CREATE TABLE country_emissions (name VARCHAR(50), region VARCHAR(50), year INT, carbon_emissions INT); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2017, 10000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2018, 9000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 1', 'Latin America and Caribbean', 2019, 8000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2017, 15000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2018, 14000); INSERT INTO country_emissions (name, region, year, carbon_emissions) VALUES ('Country 2', 'Latin America and Caribbean', 2019, 13000);", "sql": "SELECT region, COUNT(*) FROM country_emissions WHERE region = 'Latin America and Caribbean' AND carbon_emissions < (SELECT carbon_emissions FROM country_emissions WHERE name = 'Country 1' AND year = 2017 AND region = 'Latin America and Caribbean' ORDER BY year DESC LIMIT 1) GROUP BY region HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 311, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is McCains percent when Obamas is 39.13%", "schema": "CREATE TABLE table_20688030_1 (mccain_number VARCHAR, obama_percentage VARCHAR)", "sql": "SELECT COUNT(mccain_number) FROM table_20688030_1 WHERE obama_percentage = '39.13%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest size of the school with titans as the mascot?", "schema": "CREATE TABLE table_name_93 (size INTEGER, mascot VARCHAR)", "sql": "SELECT MIN(size) FROM table_name_93 WHERE mascot = 'titans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Delete any soil moisture readings that are older than 30 days.", "schema": "CREATE TABLE Soil_Moisture (ID INT, Moisture FLOAT, Timestamp DATETIME); INSERT INTO Soil_Moisture (ID, Moisture, Timestamp) VALUES (1, 45, '2022-01-01 10:00:00'), (2, 52, '2022-01-15 12:00:00');", "sql": "DELETE FROM Soil_Moisture WHERE Timestamp < NOW() - INTERVAL '30 days';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 330).", "schema": null, "sql": "SELECT btrim('xyxtrimyyx'::citext, 'xy'::text ) = 'trim' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Winning driver has a Winning constructor of talbot?", "schema": "CREATE TABLE table_name_59 (winning_driver VARCHAR, winning_constructor VARCHAR)", "sql": "SELECT winning_driver FROM table_name_59 WHERE winning_constructor = 'talbot';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total quantity of recycled materials used by each brand?", "schema": "CREATE TABLE Brands (BrandID INT, BrandName VARCHAR(50)); INSERT INTO Brands (BrandID, BrandName) VALUES (1, 'BrandX'), (2, 'BrandY'), (3, 'BrandZ'); CREATE TABLE Products (ProductID INT, ProductName VARCHAR(50), BrandID INT, RecycledMaterials INT); INSERT INTO Products (ProductID, ProductName, BrandID, RecycledMaterials) VALUES (1, 'ProductA', 1, 25), (2, 'ProductB', 1, 30), (3, 'ProductC', 2, 15), (4, 'ProductD', 2, 0), (5, 'ProductE', 3, 35), (6, 'ProductF', 3, 40);", "sql": "SELECT BrandName, SUM(RecycledMaterials) as TotalRecycledMaterials FROM Brands b JOIN Products p ON b.BrandID = p.BrandID GROUP BY BrandName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the average age of patients who identified as LGBTQ+ and have been diagnosed with anxiety?", "schema": "CREATE TABLE patients (id INT PRIMARY KEY, name VARCHAR(255), age INT, gender VARCHAR(50), ethnicity VARCHAR(255), condition VARCHAR(255)); INSERT INTO patients (id, name, age, gender, ethnicity, condition) VALUES (1, 'Alex Johnson', 30, 'Male', 'Latinx', 'Anxiety');", "sql": "SELECT AVG(age) as average_age FROM patients WHERE ethnicity LIKE '%LGBTQ+%' AND condition = 'Anxiety';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "How many cases were handled by each attorney in the last year?", "schema": "CREATE TABLE cases (case_id INT, attorney_name VARCHAR(255), case_date DATE); INSERT INTO cases (case_id, attorney_name, case_date) VALUES (1, 'Smith', '2020-01-01'), (2, 'Jones', '2020-05-15'), (3, 'Jones', '2021-07-20'), (4, 'Smith', '2020-12-31'), (5, 'Brown', '2020-06-20');", "sql": "SELECT attorney_name, COUNT(*) FROM cases WHERE case_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY attorney_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Insert a new product with id 8 from 'Italy'", "schema": "CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(100), category VARCHAR(50), price DECIMAL(5,2));", "sql": "INSERT INTO products (id, name, category, price) VALUES (8, 'Parmigiano Reggiano', 'Cheese', 29.99, 'Italy');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL regression test 'inet': Write the SELECT query (example 97).", "schema": null, "sql": "SELECT '127.0.0.2'::inet - ('127.0.0.2'::inet + 500);", "explanation": "Regression test for Inet in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '127.0.0.2'::inet - ('127.0.0.2'::inet + 500)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 252).", "schema": null, "sql": "SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How old was the person born 24 September 1851 at the time of disaster?", "schema": "CREATE TABLE table_name_81 (age_at_time_of_disaster VARCHAR, date_of_birth VARCHAR)", "sql": "SELECT age_at_time_of_disaster FROM table_name_81 WHERE date_of_birth = '24 september 1851';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Which menu items have a high calorie count and are also bestsellers?", "schema": "CREATE TABLE MenuItems (id INT, item VARCHAR(30), calories INT, popularity INT); INSERT INTO MenuItems (id, item, calories, popularity) VALUES (1, 'Cheese Burger', 600, 100), (2, 'Garden Salad', 200, 50);", "sql": "SELECT item, calories FROM MenuItems WHERE popularity > 70 ORDER BY calories DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which party belongs to district 41, and is delegated by Jill P. Carter?", "schema": "CREATE TABLE table_name_61 (party VARCHAR, district VARCHAR, delegate VARCHAR)", "sql": "SELECT party FROM table_name_61 WHERE district = 41 AND delegate = 'jill p. carter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much money does the player with a score of 76-70-65-68=279 have?", "schema": "CREATE TABLE table_name_24 (money___$__ VARCHAR, score VARCHAR)", "sql": "SELECT money___$__ FROM table_name_24 WHERE score = 76 - 70 - 65 - 68 = 279;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the TO par for the player who scored 70-68-74-70=282?", "schema": "CREATE TABLE table_name_26 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_26 WHERE score = 70 - 68 - 74 - 70 = 282;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the constructor for the VII Race of Champions?", "schema": "CREATE TABLE table_name_34 (constructor VARCHAR, race_name VARCHAR)", "sql": "SELECT constructor FROM table_name_34 WHERE race_name = 'vii race of champions';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the minister with a term end on 10 March 1974?", "schema": "CREATE TABLE table_name_71 (minister VARCHAR, term_end VARCHAR)", "sql": "SELECT minister FROM table_name_71 WHERE term_end = '10 march 1974';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 9).", "schema": null, "sql": "SELECT * FROM test_float8 WHERE i>1::float8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was there a result of 4-5 and a score of 1-1?", "schema": "CREATE TABLE table_name_98 (venue VARCHAR, result VARCHAR, score VARCHAR)", "sql": "SELECT venue FROM table_name_98 WHERE result = '4-5' AND score = '1-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was zach johnson's score to par?", "schema": "CREATE TABLE table_275162_1 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_275162_1 WHERE player = 'Zach Johnson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date, when the Sport is luge - men's doubles, and when the Record is, \"start\"?", "schema": "CREATE TABLE table_name_90 (date VARCHAR, sport VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_90 WHERE sport = 'luge - men's doubles' AND record = 'start';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Find the number of Mars rovers with a mass greater than 500 kg", "schema": "CREATE TABLE rovers (id INT, name VARCHAR(50), mass INT, manufacturer VARCHAR(50));", "sql": "SELECT COUNT(*) FROM rovers WHERE mass > 500 AND planet = 'Mars';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total top-25 of the U.S. Open, which has less than 7 cuts?", "schema": "CREATE TABLE table_name_84 (top_25 INTEGER, tournament VARCHAR, cuts_made VARCHAR)", "sql": "SELECT SUM(top_25) FROM table_name_84 WHERE tournament = 'u.s. open' AND cuts_made < 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different titles does the representative whose mission was terminated on August 5, 1984 have?", "schema": "CREATE TABLE table_20065425_1 (title VARCHAR, termination_of_mission VARCHAR)", "sql": "SELECT COUNT(title) FROM table_20065425_1 WHERE termination_of_mission = 'August 5, 1984';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the distinct positions of the players from a country whose capital is Dublin?", "schema": "CREATE TABLE country (Country_id VARCHAR, Capital VARCHAR); CREATE TABLE match_season (Position VARCHAR, Country VARCHAR)", "sql": "SELECT DISTINCT T2.Position FROM country AS T1 JOIN match_season AS T2 ON T1.Country_id = T2.Country WHERE T1.Capital = 'Dublin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "List all rural infrastructure projects in India and their respective start dates.", "schema": "CREATE TABLE rural_infrastructure_projects (id INT, project_name VARCHAR(50), country VARCHAR(50), start_date DATE); INSERT INTO rural_infrastructure_projects (id, project_name, country, start_date) VALUES (1, 'Rajiv Gandhi Rural Electrification Program', 'India', '2010-04-01'), (2, 'BharatNet Rural Broadband Initiative', 'India', '2015-07-26');", "sql": "SELECT project_name, start_date FROM rural_infrastructure_projects WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "List broadband subscribers who joined after mobile subscribers with the same region_id.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO mobile_subscribers (subscriber_id, region_id, join_date) VALUES (1, 1, '2021-01-01'), (2, 2, '2021-03-01'), (3, 3, '2021-02-01'), (4, 4, '2021-04-01'), (5, 1, '2021-01-15'), (6, 2, '2021-03-15'), (7, 3, '2021-02-15'), (8, 4, '2021-04-15'); CREATE TABLE broadband_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO broadband_subscribers (subscriber_id, region_id, join_date) VALUES (9, 1, '2021-01-20'), (10, 2, '2021-03-20'), (11, 3, '2021-02-20'), (12, 4, '2021-04-20'), (13, 1, '2021-01-25'), (14, 2, '2021-03-25'), (15, 3, '2021-02-25'), (16, 4, '2021-04-25');", "sql": "SELECT b.* FROM broadband_subscribers b INNER JOIN mobile_subscribers m ON b.region_id = m.region_id WHERE b.join_date > m.join_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the count of unique accommodations for 'MobilityAssistanceDog' in the 'ServiceAnimalAccommodations' table?", "schema": "CREATE TABLE ServiceAnimalAccommodations (service_animal_id INT, accommodation_type VARCHAR(255)); INSERT INTO ServiceAnimalAccommodations (service_animal_id, accommodation_type) VALUES (1001, 'VisualAssistanceDog'), (1002, 'HearingDog'), (1003, 'MobilityAssistanceDog'), (1004, 'MobilityAssistanceDog'), (1005, 'VisualAssistanceDog');", "sql": "SELECT COUNT(DISTINCT accommodation_type) FROM ServiceAnimalAccommodations WHERE accommodation_type = 'MobilityAssistanceDog';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Location for the jiu-jitsu vs martial arts?", "schema": "CREATE TABLE table_name_36 (location VARCHAR, event VARCHAR)", "sql": "SELECT location FROM table_name_36 WHERE event = 'jiu-jitsu vs martial arts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Proto-Germanic when the German is /t/?", "schema": "CREATE TABLE table_name_48 (proto_germanic VARCHAR, german VARCHAR)", "sql": "SELECT proto_germanic FROM table_name_48 WHERE german = '/t/';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many circular economy initiatives were launched in Osaka in 2020?", "schema": "CREATE TABLE circular_economy_initiatives(location VARCHAR(20), launch_date DATE); INSERT INTO circular_economy_initiatives VALUES('Osaka', '2020-01-01'), ('Osaka', '2020-03-15'), ('Tokyo', '2019-12-31');", "sql": "SELECT COUNT(*) as initiatives FROM circular_economy_initiatives WHERE location = 'Osaka' AND YEAR(launch_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the total number of fish in each farm in the aquaculture facility?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, location TEXT, size INT);CREATE TABLE fish_population (id INT, farm_id INT, species TEXT, population INT, biomass FLOAT, date DATE);", "sql": "SELECT farm_id, SUM(population) FROM fish_population GROUP BY farm_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (plain, item 12).", "schema": null, "sql": "CREATE TABLE z (b TEXT, PRIMARY KEY(aa, b)) inherits (a);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of workers from historically underrepresented communities in the Operations department?", "schema": "CREATE TABLE departments (id INT, name VARCHAR(255), diversity_stats VARCHAR(255)); INSERT INTO departments (id, name, diversity_stats) VALUES (1, 'HR', '{\"total_employees\":50, \"underrepresented\":20}'), (2, 'Operations', '{\"total_employees\":75, \"underrepresented\":15}'), (3, 'Finance', '{\"total_employees\":60, \"underrepresented\":10}');", "sql": "SELECT d.name AS department, JSON_EXTRACT(d.diversity_stats, '$.underrepresented') AS underrepresented_count FROM departments d WHERE d.name = 'Operations';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What was the total donation amount for the year 2020 in the 'Great Lakes' region?", "schema": "CREATE TABLE Donations (donation_id INT, region VARCHAR(20), amount DECIMAL(10,2), donation_year INT); INSERT INTO Donations (donation_id, region, amount, donation_year) VALUES (1, 'Great Lakes', 5000.00, 2020), (2, 'Southeast', 3000.00, 2020);", "sql": "SELECT SUM(amount) FROM Donations WHERE region = 'Great Lakes' AND donation_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What was the total budget for criminal justice systems before 2018?", "schema": "CREATE TABLE public.criminal_justice (id serial PRIMARY KEY, name text, type text, budget integer, year integer); INSERT INTO public.criminal_justice (name, type, budget, year) VALUES ('Prison System', 'Corrections', 85000000, 2020), ('Police Department', 'Law Enforcement', 150000000, 2018);", "sql": "SELECT name, type, budget, year, (SELECT SUM(budget) FROM public.criminal_justice cj2 WHERE cj2.year < cj.year AND cj2.id <> cj.id) as total_budget_before FROM public.criminal_justice cj WHERE year < 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 205, "num_statements": 1} {"question": "Which urban farms in Oakland, CA have the highest yield per acre?", "schema": "CREATE TABLE urban_farms (name TEXT, city TEXT, state TEXT, acres NUMERIC, yield NUMERIC); INSERT INTO urban_farms (name, city, state, acres, yield) VALUES ('Groundwork', 'Oakland', 'CA', 2.5, 15000), ('City Slicker Farms', 'Oakland', 'CA', 3.2, 12000), ('Kinderfarms', 'Oakland', 'CA', 1.9, 8000);", "sql": "SELECT name, acres, yield, ROW_NUMBER() OVER (ORDER BY yield/acres DESC) as rank FROM urban_farms WHERE city = 'Oakland' AND state = 'CA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "How many articles were published per day in January 2020?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), content TEXT, publish_date DATE); INSERT INTO articles (id, title, content, publish_date) VALUES (1, 'Article 1', 'Content 1', '2020-01-01'), (2, 'Article 2', 'Content 2', '2020-01-02'), (3, 'Article 3', 'Content 3', '2020-01-03');", "sql": "SELECT DATE_FORMAT(publish_date, '%Y-%m-%d') AS day, COUNT(*) as articles_per_day FROM articles WHERE YEAR(publish_date) = 2020 AND MONTH(publish_date) = 1 GROUP BY day;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of season number for jeff truman", "schema": "CREATE TABLE table_15838081_3 (season__number VARCHAR, writer_s_ VARCHAR)", "sql": "SELECT COUNT(season__number) FROM table_15838081_3 WHERE writer_s_ = 'Jeff Truman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which highschool has senior (rs) for the year?", "schema": "CREATE TABLE table_22496344_1 (high_school VARCHAR, year VARCHAR)", "sql": "SELECT high_school FROM table_22496344_1 WHERE year = 'Senior (RS)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 184).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_normal_function( 'tap_accum', ARRAY[etype()], 'whatever' ),\n true,\n 'isnt_normal_function(func, agg, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "PostgreSQL Rules: show example 31.", "schema": null, "sql": "INSERT INTO shoelace_log VALUES ( new.sl_name, new.sl_avail, current_user, current_timestamp ) FROM shoelace_data new, shoelace_data old, shoelace_data shoelace_data WHERE new.sl_avail <> old.sl_avail ;", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 202, "num_statements": 1} {"question": "What is the minimum mental health score of students who identify as Indigenous, grouped by their ethnicity?", "schema": "CREATE TABLE students (student_id INT, ethnicity VARCHAR(255), mental_health_score INT); INSERT INTO students (student_id, ethnicity, mental_health_score) VALUES (1, 'Native American', 80), (2, 'Latino', 70), (3, 'Indigenous Australian', 90);", "sql": "SELECT ethnicity, MIN(mental_health_score) as min_score FROM students WHERE ethnicity LIKE '%Indigenous%' GROUP BY ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance for july 18?", "schema": "CREATE TABLE table_name_84 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_84 WHERE date = 'july 18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where has a Rules of thai boxing, and a Round of n/a, and an Opponent of everton crawford?", "schema": "CREATE TABLE table_name_99 (location VARCHAR, opponent VARCHAR, rules VARCHAR, round VARCHAR)", "sql": "SELECT location FROM table_name_99 WHERE rules = 'thai boxing' AND round = 'n/a' AND opponent = 'everton crawford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the average fare for bus routes serving low-income neighborhoods?", "schema": "CREATE TABLE bus_routes (route_id INT, neighborhood VARCHAR(255), fare DECIMAL(5,2)); INSERT INTO bus_routes (route_id, neighborhood, fare) VALUES (1, 'Westwood', 1.50), (2, 'Downtown', 2.50), (3, 'Eastside', 0.75);", "sql": "SELECT AVG(fare) FROM bus_routes WHERE neighborhood IN (SELECT neighborhood FROM low_income_neighborhoods);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average duration of peacekeeping operations for each country?", "schema": "CREATE TABLE Peacekeeping_Operations (Operation_ID INT, Country_Name VARCHAR(50), Start_Date DATE, End_Date DATE); INSERT INTO Peacekeeping_Operations (Operation_ID, Country_Name, Start_Date, End_Date) VALUES (1, 'Bangladesh', '2005-01-01', '2007-12-31');", "sql": "SELECT Country_Name, AVG(DATEDIFF(End_Date, Start_Date)) as Average_Duration FROM Peacekeeping_Operations GROUP BY Country_Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the player when the pick is 22 and the year [A] is after 1979?", "schema": "CREATE TABLE table_name_5 (player_name VARCHAR, pick VARCHAR, year_ VARCHAR, a_ VARCHAR)", "sql": "SELECT player_name FROM table_name_5 WHERE pick = '22' AND year_[a_] > 1979;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 284).", "schema": null, "sql": "select '[2]={1,7}'::int[];", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[2]={1,7}'::int[]) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many draft copies does the document with id 2 have?", "schema": "CREATE TABLE Draft_Copies (document_id VARCHAR)", "sql": "SELECT COUNT(*) FROM Draft_Copies WHERE document_id = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Insert records into the 'user_demographics' table", "schema": "CREATE TABLE user_demographics (user_id INT, age INT, gender VARCHAR(10), occupation VARCHAR(255));", "sql": "INSERT INTO user_demographics (user_id, age, gender, occupation) VALUES (1, 35, 'male', 'software engineer');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the maximum number of sessions attended by a patient in Spain for any therapy?", "schema": "CREATE TABLE therapy_attendance (id INT, patient_id INT, session_name TEXT, num_sessions INT, country TEXT);", "sql": "SELECT MAX(num_sessions) FROM therapy_attendance WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total revenue of sales in the last quarter for ethical clothing produced in factories with fair labor practices?", "schema": "CREATE TABLE Factories (factory_id INT, name VARCHAR(100), location VARCHAR(100), num_workers INT, has_fair_labor BOOLEAN); CREATE TABLE Products (product_id INT, name VARCHAR(100), factory_id INT, price DECIMAL(5,2)); CREATE TABLE Sales (sale_id INT, product_id INT, quantity INT, sale_date DATE); INSERT INTO Factories VALUES (1,'Factory A','New York',200,TRUE),(2,'Factory B','Mumbai',350,FALSE),(3,'Factory C','Dhaka',500,TRUE),(4,'Factory D','São Paulo',400,FALSE); INSERT INTO Products VALUES (1,'Eco T-Shirt',1,20.00),(2,'Fair Trade Blouse',2,30.00),(3,'Sustainable Skirt',3,40.00),(4,'Organic Cotton Jeans',3,50.00); INSERT INTO Sales VALUES (1,1,10,DATE '2022-01-01'),(2,2,15,DATE '2022-02-15'),(3,3,20,DATE '2022-03-30'),(4,4,25,DATE '2022-04-15');", "sql": "SELECT SUM(Products.price * Sales.quantity) FROM Factories JOIN Products ON Factories.factory_id = Products.factory_id JOIN Sales ON Products.product_id = Sales.product_id WHERE Factories.has_fair_labor = TRUE AND Sales.sale_date >= DATE '2022-01-01' AND Sales.sale_date < DATE '2022-04-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 291, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Player, when Pick is less than 161, and when College is \"Auburn University\"?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, pick VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE pick < 161 AND college = 'auburn university';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average age of aircraft manufactured by Boeing?", "schema": "CREATE TABLE Manufacturers (Id INT, Name VARCHAR(50)); INSERT INTO Manufacturers (Id, Name) VALUES (1, 'Boeing'), (2, 'Airbus'); CREATE TABLE Aircraft (Id INT, Name VARCHAR(50), Age INT, ManufacturerId INT); INSERT INTO Aircraft (Id, Name, Age, ManufacturerId) VALUES (1, '747', 50, 1), (2, '777', 25, 1), (3, 'A320', 10, 2), (4, 'A350', 5, 2);", "sql": "SELECT AVG(Aircraft.Age) FROM Aircraft JOIN Manufacturers ON Aircraft.ManufacturerId = Manufacturers.Id WHERE Manufacturers.Name = 'Boeing';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the maximum safety rating in the 'workplace_safety' table for workplaces with a union membership size greater than 100 in the 'labor_rights' table?", "schema": "CREATE TABLE workplace_safety (safety_rating INT, workplace_id INT); CREATE TABLE labor_rights (workplace_id INT, union_membership_size INT);", "sql": "SELECT MAX(workplace_safety.safety_rating) FROM workplace_safety INNER JOIN labor_rights ON workplace_safety.workplace_id = labor_rights.workplace_id WHERE labor_rights.union_membership_size > 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 108).", "schema": null, "sql": "CREATE TABLE testpub_rf_tbl2 (c text, d integer);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Find the number of genetic research participants by project.", "schema": "CREATE TABLE research_participants (id INT, project_id INT, participant_id INT, date DATE);", "sql": "SELECT project_id, COUNT(DISTINCT participant_id) FROM research_participants GROUP BY project_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the maximum depth of the ocean?", "schema": "CREATE TABLE ocean_depth (location TEXT, depth INTEGER);", "sql": "SELECT MAX(depth) FROM ocean_depth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Find the number of visitors who attended exhibitions in Tokyo or New York.", "schema": "CREATE TABLE Visitors (id INT, city VARCHAR(20)); INSERT INTO Visitors (id, city) VALUES (1, 'Tokyo'), (2, 'Paris'), (3, 'New York'), (4, 'Berlin');", "sql": "SELECT COUNT(*) FROM Visitors WHERE city IN ('Tokyo', 'New York');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many water sources were offline for more than a month in the 'WaterSources' table?", "schema": "CREATE TABLE WaterSources (ID INT, SourceID INT, Status VARCHAR(10), LastOnline DATE); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (1, 1, 'Online', '2022-01-01'); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (2, 2, 'Offline', '2022-06-15');", "sql": "SELECT COUNT(*) FROM WaterSources WHERE Status = 'Offline' AND DATEDIFF(day, LastOnline, GETDATE()) > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Attendance has a Result of l 26–16?", "schema": "CREATE TABLE table_name_47 (attendance VARCHAR, result VARCHAR)", "sql": "SELECT attendance FROM table_name_47 WHERE result = 'l 26–16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What were the total sales of each drug in the Southeast region in Q2 2021?", "schema": "CREATE TABLE drugs (drug_id INT, drug_name VARCHAR(255)); INSERT INTO drugs (drug_id, drug_name) VALUES (1, 'DrugA'), (2, 'DrugB'); CREATE TABLE sales (sale_id INT, drug_id INT, region VARCHAR(255), sales_amount DECIMAL(10, 2), quarter INT, year INT); INSERT INTO sales (sale_id, drug_id, region, sales_amount, quarter, year) VALUES (1, 1, 'Southeast', 15000, 2, 2021), (2, 2, 'Southeast', 20000, 2, 2021);", "sql": "SELECT d.drug_name, SUM(s.sales_amount) as total_sales FROM drugs d JOIN sales s ON d.drug_id = s.drug_id WHERE s.region = 'Southeast' AND s.quarter = 2 AND s.year = 2021 GROUP BY d.drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Which community has the highest obesity rate in the US?", "schema": "CREATE TABLE Community (Name TEXT, State TEXT, ObesityRate FLOAT); INSERT INTO Community (Name, State, ObesityRate) VALUES ('Community A', 'California', 20.0); INSERT INTO Community (Name, State, ObesityRate) VALUES ('Community B', 'Texas', 25.0);", "sql": "SELECT Name, ObesityRate FROM Community WHERE State = 'US' ORDER BY ObesityRate DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What was the total number of volunteers registered in 'New York' and 'Florida'?", "schema": "CREATE TABLE Volunteers (volunteer_id INT, registration_date DATE, state VARCHAR(20)); INSERT INTO Volunteers (volunteer_id, registration_date, state) VALUES (1, '2022-01-01', 'New York'), (2, '2022-01-02', 'Florida');", "sql": "SELECT SUM(state = 'New York') + SUM(state = 'Florida') FROM Volunteers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total investment in companies with a high ESG rating (greater than 80)?", "schema": "CREATE TABLE investments (investment_id INT, company_id INT, investment_amount FLOAT); INSERT INTO investments (investment_id, company_id, investment_amount) VALUES (1, 1, 150000), (2, 2, 200000), (3, 3, 120000), (4, 5, 250000); CREATE TABLE companies (company_id INT, ESG_rating FLOAT); INSERT INTO companies (company_id, ESG_rating) VALUES (1, 85.2), (2, 78.1), (3, 88.5), (4, 65.3), (5, 82.7);", "sql": "SELECT SUM(investment_amount) FROM investments JOIN companies ON investments.company_id = companies.company_id WHERE companies.ESG_rating > 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the total number of customers who have made at least one transaction in the last week?", "schema": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), last_transaction_date DATE); INSERT INTO customers (customer_id, name, last_transaction_date) VALUES (1, 'John Doe', '2022-02-05'), (2, 'Jane Smith', NULL), (3, 'Bob Johnson', '2022-02-02');", "sql": "SELECT COUNT(DISTINCT customer_id) FROM customers WHERE last_transaction_date >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How long did it take to ride when the laps were 23 and the grid of 13?", "schema": "CREATE TABLE table_name_65 (time VARCHAR, laps VARCHAR, grid VARCHAR)", "sql": "SELECT time FROM table_name_65 WHERE laps = 23 AND grid = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 292).", "schema": null, "sql": "select t1.ten, sum(x) from\n tenk1 t1 left join lateral (\n select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2\n ) ss on t1.unique1 = ss.fivethous\ngroup by t1.ten\norder by t1.ten;", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select t1.ten, sum(x) from\n tenk1 t1 left join lateral (\n select t1.ten + t2.ten as x, t2.fivethous from tenk1 t2\n ) ss on t1.unique1 = ss.fivethous\ngroup by t1.ten\norder by t1.ten) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 186, "num_statements": 1} {"question": "How many mental health parity consultations were conducted by community health workers in California?", "schema": "CREATE TABLE community_health_workers (id INT, name TEXT, zip TEXT, consultations INT); INSERT INTO community_health_workers (id, name, zip, consultations) VALUES (1, 'John Doe', '90001', 30), (2, 'Jane Smith', '94117', 45);", "sql": "SELECT SUM(consultations) FROM community_health_workers WHERE zip BETWEEN '90001' AND '96162';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the landesliga mitte sv türk gücü münchen", "schema": "CREATE TABLE table_20181270_3 (landesliga_mitte VARCHAR, bayernliga VARCHAR)", "sql": "SELECT landesliga_mitte FROM table_20181270_3 WHERE bayernliga = 'SV Türk Gücü München';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the maximum financial capability score for individuals in India, grouped by gender?", "schema": "CREATE TABLE individuals (id INT, country VARCHAR(255), gender VARCHAR(255), financial_capability_score INT);", "sql": "SELECT gender, MAX(financial_capability_score) FROM individuals WHERE country = 'India' GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the count of students who received accommodations for 'Extended Testing Time' in the 'StudentAccommodations' table?", "schema": "CREATE TABLE StudentAccommodations (student_id INT, accommodation_type VARCHAR(255)); INSERT INTO StudentAccommodations (student_id, accommodation_type) VALUES (1, 'Sign Language Interpreter'), (2, 'Assistive Technology'), (3, 'Extended Testing Time'), (4, 'Extended Testing Time');", "sql": "SELECT COUNT(*) FROM StudentAccommodations WHERE accommodation_type = 'Extended Testing Time';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 11).", "schema": null, "sql": "select encrypt('foo', '0123456789012345678901', 'aes');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the game held that was played on 2002-03-07?", "schema": "CREATE TABLE table_name_36 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_36 WHERE date = '2002-03-07';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Identify unique authors who have written for 'The Hindu' and 'NDTV' in May 2021.", "schema": "CREATE TABLE hindu (author_id INT, author_name VARCHAR(50), article_date DATE); INSERT INTO hindu (author_id, author_name, article_date) VALUES (1, 'Rajesh Patel', '2021-05-01'), (2, 'Priya Gupta', '2021-05-02'); CREATE TABLE ndtv (author_id INT, author_name VARCHAR(50), article_date DATE); INSERT INTO ndtv (author_id, author_name, article_date) VALUES (3, 'Meera Kapoor', '2021-05-01'), (4, 'Rajesh Patel', '2021-05-03');", "sql": "SELECT author_name FROM hindu WHERE article_date BETWEEN '2021-05-01' AND '2021-05-31' INTERSECT SELECT author_name FROM ndtv WHERE article_date BETWEEN '2021-05-01' AND '2021-05-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the react when the country is sweden and the lane is higher than 6?", "schema": "CREATE TABLE table_name_65 (react INTEGER, country VARCHAR, lane VARCHAR)", "sql": "SELECT SUM(react) FROM table_name_65 WHERE country = 'sweden' AND lane > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which vessels have been inspected in the last month?", "schema": "CREATE TABLE Vessel (vessel_id INT, name VARCHAR(255), type VARCHAR(255), max_speed DECIMAL(5,2)); CREATE TABLE Inspection (inspection_id INT, vessel_id INT, inspection_time TIMESTAMP); INSERT INTO Vessel (vessel_id, name, type, max_speed) VALUES (1, 'Test Vessel 1', 'Cargo', 20.5), (2, 'Test Vessel 2', 'Tanker', 15.2); INSERT INTO Inspection (inspection_id, vessel_id, inspection_time) VALUES (1, 1, '2022-01-01 12:00:00'), (2, 2, '2022-01-15 10:00:00'), (3, 1, '2022-02-01 09:00:00');", "sql": "SELECT v.vessel_id, v.name FROM Vessel v INNER JOIN Inspection i ON v.vessel_id = i.vessel_id WHERE i.inspection_time >= NOW() - INTERVAL '1 month';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the minimum cargo weight handled by port 'Busan' and 'Incheon'?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(255)); INSERT INTO ports (port_id, port_name) VALUES (1, 'Busan'), (2, 'Incheon'), (3, 'Daegu'); CREATE TABLE cargo (cargo_id INT, port_id INT, weight FLOAT); INSERT INTO cargo (cargo_id, port_id, weight) VALUES (1, 1, 1000), (2, 1, 1500), (3, 2, 800), (4, 3, 1200);", "sql": "SELECT MIN(weight) FROM cargo WHERE port_name IN ('Busan', 'Incheon');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all unique medical conditions of astronauts from Brazil.", "schema": "CREATE TABLE AstronautMedical (id INT, astronaut_id INT, nationality VARCHAR(50), medical_condition VARCHAR(50)); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (1, 121, 'Brazil', 'Hypercalcemia'); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (2, 122, 'Brazil', 'Urinary Tract Infection'); INSERT INTO AstronautMedical (id, astronaut_id, nationality, medical_condition) VALUES (3, 123, 'Brazil', 'Nausea');", "sql": "SELECT DISTINCT medical_condition FROM AstronautMedical WHERE nationality = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total investment in economic diversification projects in Indonesia, Malaysia, and the Philippines, including only manufacturing and tourism projects?", "schema": "CREATE TABLE eco_diversification (id INT, name TEXT, location TEXT, investment FLOAT, project_type TEXT); INSERT INTO eco_diversification (id, name, location, investment, project_type) VALUES (1, 'Renewable Energy', 'Indonesia', 700000.00, 'Energy'), (2, 'Tourism Infrastructure', 'Malaysia', 600000.00, 'Tourism'), (3, 'Manufacturing Zone', 'Philippines', 800000.00, 'Manufacturing'), (4, 'Mining Concession', 'Indonesia', 900000.00, 'Mining');", "sql": "SELECT SUM(investment) FROM eco_diversification WHERE location IN ('Indonesia', 'Malaysia', 'Philippines') AND project_type IN ('Manufacturing', 'Tourism');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Show an example of PostgreSQL SELECT (example 10).", "schema": null, "sql": "SELECT * FROM distributors ORDER BY name; SELECT * FROM distributors ORDER BY 2; did | name -----+------------------ 109 | 20th Century Fox 110 | Bavaria Atelier 101 | British Lion 107 | Columbia 102 | Jean Luc Godard 113 | Luso films 104 | Mosfilm 103 | Paramount 106 | Toho 105 | United Artists 111 | Walt Disney 112 | Warner Bros. 108 | Westward;", "explanation": "PostgreSQL SELECT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 349, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: In which colleges is the NFL Team New York Giants and with the position defensive back?", "schema": "CREATE TABLE table_2508633_11 (college VARCHAR, nfl_team VARCHAR, position VARCHAR)", "sql": "SELECT college FROM table_2508633_11 WHERE nfl_team = 'New York Giants' AND position = 'Defensive back';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 700).", "schema": null, "sql": "select jsonb_path_query_tz(\n\t'[\"2017-03-10\", \"2017-03-11\", \"2017-03-09\", \"2017-03-10 00:00:00\", \"2017-03-10 12:34:56\", \"2017-03-10 01:02:03+04\", \"2017-03-10 03:00:00+03\"]',\n\t'$[*].date() ? (@ == \"2017-03-10\".date())');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query_tz(\n\t'[\"2017-03-10\", \"2017-03-11\", \"2017-03-09\", \"2017-03-10 00:00:00\", \"2017-03-10 12:34:56\", \"2017-03-10 01:02:03+04\", \"2017-03-10 03:00:00+03\"]',\n\t'$[*].date() ? (@ == \"2017-03-10\".date())')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the production number directed by Robert McKimson in series mm titled People Are Bunny?", "schema": "CREATE TABLE table_name_18 (production_number INTEGER, title VARCHAR, director VARCHAR, series VARCHAR)", "sql": "SELECT SUM(production_number) FROM table_name_18 WHERE director = 'robert mckimson' AND series = 'mm' AND title = 'people are bunny';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the total number of streams by country?", "schema": "CREATE TABLE Countries (CountryID int, CountryName varchar(255)); INSERT INTO Countries (CountryID, CountryName) VALUES (1, 'USA'), (2, 'UK'), (3, 'France'); CREATE TABLE StreamsByCountry (StreamID int, CountryID int, StreamCount int); INSERT INTO StreamsByCountry (StreamID, CountryID, StreamCount) VALUES (1, 1, 1000), (2, 2, 2000), (3, 1, 1500);", "sql": "SELECT Countries.CountryName, SUM(StreamsByCountry.StreamCount) as TotalStreams FROM Countries INNER JOIN StreamsByCountry ON Countries.CountryID = StreamsByCountry.CountryID GROUP BY Countries.CountryName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Update the ocean_name for species_id 1 to 'Indian Ocean'.", "schema": "CREATE TABLE marine_species (species_id INT, species_name VARCHAR(50), ocean_name VARCHAR(50));", "sql": "UPDATE marine_species SET ocean_name = 'Indian Ocean' WHERE species_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Position with less than 57 against and the team is Juventus?", "schema": "CREATE TABLE table_name_19 (position INTEGER, against VARCHAR, team VARCHAR)", "sql": "SELECT AVG(position) FROM table_name_19 WHERE against < 57 AND team = 'juventus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total number of citizen feedback records for public services in the city of Los Angeles?", "schema": "CREATE TABLE citizen_feedback (city VARCHAR(20), service VARCHAR(20)); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public transportation'); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public transportation'); INSERT INTO citizen_feedback (city, service) VALUES ('Los Angeles', 'public parks'); INSERT INTO citizen_feedback (city, service) VALUES ('San Diego', 'public libraries');", "sql": "SELECT COUNT(*) FROM citizen_feedback WHERE city = 'Los Angeles';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total number of crimes committed in each city in the last 3 months?", "schema": "CREATE TABLE cities (id INT, name TEXT);CREATE TABLE crimes (id INT, city_id INT, date DATE);", "sql": "SELECT c.name, COUNT(cr.id) FROM cities c JOIN crimes cr ON c.id = cr.city_id WHERE cr.date >= DATEADD(month, -3, GETDATE()) GROUP BY c.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the national trophy/rookie is Simone Iaquinta, what is the season total number?", "schema": "CREATE TABLE table_25563779_4 (season VARCHAR, national_trophy_rookie VARCHAR)", "sql": "SELECT COUNT(season) FROM table_25563779_4 WHERE national_trophy_rookie = 'Simone Iaquinta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the candidate when the result - % was 0.4%?", "schema": "CREATE TABLE table_25818630_2 (candidate VARCHAR, result____percentage VARCHAR)", "sql": "SELECT candidate FROM table_25818630_2 WHERE result____percentage = '0.4%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which name has a Kanji of 朧?", "schema": "CREATE TABLE table_name_77 (name VARCHAR, kanji VARCHAR)", "sql": "SELECT name FROM table_name_77 WHERE kanji = '朧';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the latest episode in a season where the U.S. viewers totaled 14.37 million?", "schema": "CREATE TABLE table_10842344_1 (no_in_season INTEGER, us_viewers__millions_ VARCHAR)", "sql": "SELECT MAX(no_in_season) FROM table_10842344_1 WHERE us_viewers__millions_ = '14.37';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of professional development courses completed by teachers in each school, grouped by course type?", "schema": "CREATE TABLE teacher_pd (teacher_id INT, school_id INT, course_id INT, course_type VARCHAR(255)); CREATE TABLE courses (course_id INT, course_name VARCHAR(255), course_type VARCHAR(255)); CREATE TABLE schools (school_id INT, school_name VARCHAR(255));", "sql": "SELECT s.school_name, c.course_type, COUNT(DISTINCT t.teacher_id, t.course_id) as num_courses FROM teacher_pd t INNER JOIN schools s ON t.school_id = s.school_id INNER JOIN courses c ON t.course_id = c.course_id GROUP BY s.school_name, c.course_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Calculate the average fuel consumption per hour for the vessel 'Blue Whale' in the 'Tankers' fleet in the past week.", "schema": "CREATE TABLE Vessels (id INT, name VARCHAR(255)); INSERT INTO Vessels (id, name) VALUES (1, 'Blue Whale'); CREATE TABLE FuelConsumption (vessel_id INT, fuel_consumption INT, timestamp TIMESTAMP); INSERT INTO FuelConsumption (vessel_id, fuel_consumption, timestamp) VALUES (1, 500, '2022-07-01 10:00:00'), (1, 800, '2022-07-01 22:00:00');", "sql": "SELECT AVG(fuel_consumption / DATEDIFF(HOUR, LAG(timestamp) OVER (PARTITION BY vessel_id ORDER BY timestamp), timestamp)) as avg_fuel_consumption_per_hour FROM FuelConsumption WHERE vessel_id = 1 AND timestamp >= DATEADD(week, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 242, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Date has a Catalog of 887 195-2, and a Format of cd maxi?", "schema": "CREATE TABLE table_name_63 (date VARCHAR, catalog VARCHAR, format VARCHAR)", "sql": "SELECT date FROM table_name_63 WHERE catalog = '887 195-2' AND format = 'cd maxi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 55 is the tries for what is the lost?", "schema": "CREATE TABLE table_17941032_1 (lost VARCHAR, tries_for VARCHAR)", "sql": "SELECT lost FROM table_17941032_1 WHERE tries_for = '55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What does American have if Australia has əʉ?", "schema": "CREATE TABLE table_name_65 (american VARCHAR, australian VARCHAR)", "sql": "SELECT american FROM table_name_65 WHERE australian = 'əʉ';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copy' (example 43).", "schema": null, "sql": "create table parted_copytest_a1 (c text, b int, a int);", "explanation": "DDL from PostgreSQL core regression test for Copy.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the finish for England?", "schema": "CREATE TABLE table_name_76 (finish VARCHAR, country VARCHAR)", "sql": "SELECT finish FROM table_name_76 WHERE country = 'england';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "List all public transportation routes with more than 50% hybrid bus usage in London.", "schema": "CREATE TABLE public_transportation (route_id INT, route_name TEXT, vehicle_type TEXT, is_hybrid BOOLEAN, passengers INT);", "sql": "SELECT route_name FROM public_transportation WHERE vehicle_type = 'Bus' AND is_hybrid = TRUE GROUP BY route_name HAVING COUNT(*) FILTER (WHERE is_hybrid = TRUE) / COUNT(*) > 0.5 AND route_name LIKE 'London%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 208, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is Adam Gilchrist from?", "schema": "CREATE TABLE table_11303072_9 (nationality VARCHAR, player VARCHAR)", "sql": "SELECT nationality FROM table_11303072_9 WHERE player = 'Adam Gilchrist';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What are the total earnings of each eSports team in the last year?", "schema": "CREATE TABLE eSportsTeams (TeamID INT, TeamName VARCHAR(100), Earnings FLOAT, LastUpdated DATE); INSERT INTO eSportsTeams (TeamID, TeamName, Earnings, LastUpdated) VALUES (1, 'TeamA', 1000000, '2020-01-01'), (2, 'TeamB', 1200000, '2020-02-01'), (3, 'TeamC', 1100000, '2020-03-01');", "sql": "SELECT TeamName, SUM(Earnings) as TotalEarnings FROM eSportsTeams WHERE LastUpdated >= DATEADD(year, -1, GETDATE()) GROUP BY TeamName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 3).", "schema": null, "sql": "CREATE FUNCTION nested_call_three(a text) RETURNS text\n\tAS\n'return a'\n\tLANGUAGE plpython3u ;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the total number of defense diplomacy events held in 'canada' between 2019 and 2021?", "schema": "CREATE TABLE defense_diplomacy (country VARCHAR(50), year INT, events INT); INSERT INTO defense_diplomacy (country, year, events) VALUES ('Canada', 2019, 15), ('Canada', 2020, 12), ('Canada', 2021, 18);", "sql": "SELECT country, SUM(events) as total_events FROM defense_diplomacy WHERE country = 'Canada' AND year BETWEEN 2019 AND 2021 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Akira when Prince Devitt is Devitt (7:20)?", "schema": "CREATE TABLE table_name_99 (akira VARCHAR, prince_devitt VARCHAR)", "sql": "SELECT akira FROM table_name_99 WHERE prince_devitt = 'devitt (7:20)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List the faculty members who have not published any papers, in alphabetical order.", "schema": "CREATE TABLE faculties (faculty_id INT, name VARCHAR(255), dept_id INT, num_publications INT);", "sql": "SELECT name FROM faculties WHERE num_publications = 0 ORDER BY name ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average revenue per concert by country?", "schema": "CREATE TABLE concerts (id INT, artist_id INT, city VARCHAR(50), country VARCHAR(50), revenue FLOAT); INSERT INTO concerts (id, artist_id, city, country, revenue) VALUES (1, 1, 'Los Angeles', 'USA', 500000), (2, 1, 'New York', 'USA', 700000), (3, 2, 'Seoul', 'South Korea', 800000), (4, 2, 'Tokyo', 'Japan', 900000), (5, 3, 'Paris', 'France', 1000000), (6, 4, 'Osaka', 'Japan', 850000), (7, 1, 'London', 'UK', 600000);", "sql": "SELECT country, AVG(revenue) as avg_revenue FROM concerts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the Vancouver Canucks were visiting, what was the record when the score was 4-2?", "schema": "CREATE TABLE table_name_6 (record VARCHAR, score VARCHAR, visitor VARCHAR)", "sql": "SELECT record FROM table_name_6 WHERE score = '4-2' AND visitor = 'vancouver canucks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which English has Dutch of tong?", "schema": "CREATE TABLE table_name_26 (english VARCHAR, dutch VARCHAR)", "sql": "SELECT english FROM table_name_26 WHERE dutch = 'tong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Add a new record to the historical_sites table", "schema": "CREATE TABLE historical_sites (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), type VARCHAR(255));", "sql": "INSERT INTO historical_sites (id, name, country, type) VALUES (1, 'Machu Picchu', 'Peru', 'cultural_heritage');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result when team 1 is ICL Pakistan?", "schema": "CREATE TABLE table_17103566_1 (result VARCHAR, team_1 VARCHAR)", "sql": "SELECT result FROM table_17103566_1 WHERE team_1 = 'ICL Pakistan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the mens singles in 2009?", "schema": "CREATE TABLE table_12204717_1 (mens_singles VARCHAR, year VARCHAR)", "sql": "SELECT mens_singles FROM table_12204717_1 WHERE year = 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Insert a new record into the \"resources\" table for a new gold mine in \"Peru\" with ID 901 and reserves of 5000 tons", "schema": "CREATE TABLE resources (id INT, mine_type VARCHAR(50), country VARCHAR(50), reserve_tons INT);", "sql": "INSERT INTO resources (id, mine_type, country, reserve_tons) VALUES (901, 'gold', 'Peru', 5000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What's the total transaction volume for digital assets in the last month?", "schema": "CREATE TABLE digital_assets (id INT, name VARCHAR(255), transaction_volume DECIMAL(10, 2)); INSERT INTO digital_assets (id, name, transaction_volume) VALUES (1, 'Asset 1', 1000.50), (2, 'Asset 2', 1500.25), (3, 'Asset 3', 2000.00); CREATE TABLE transactions (id INT, digital_asset_id INT, transaction_date DATE); INSERT INTO transactions (id, digital_asset_id, transaction_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-01-05'), (3, 3, '2022-01-10');", "sql": "SELECT SUM(transaction_volume) FROM digital_assets JOIN transactions ON digital_assets.id = transactions.digital_asset_id WHERE transaction_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 13).", "schema": null, "sql": "SELECT to_regclass('pg_class');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_regclass('pg_class')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 31, "num_statements": 1} {"question": "What is the average age of patients who received therapy from counselors named \"James\" or \"Michelle\"?", "schema": "CREATE TABLE counselors (counselor_id INT, name TEXT, age INT); INSERT INTO counselors (counselor_id, name, age) VALUES (1, 'James', 35), (2, 'Michelle', 40); CREATE TABLE patients (patient_id INT, counselor_id INT, age INT); INSERT INTO patients (patient_id, counselor_id, age) VALUES (1, 1, 25), (2, 1, 30), (3, 2, 45), (4, 2, 50);", "sql": "SELECT AVG(patients.age) FROM patients JOIN counselors ON patients.counselor_id = counselors.counselor_id WHERE counselors.name IN ('James', 'Michelle');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the average distance of space debris generated by NASA from the Earth's center?", "schema": "CREATE TABLE space_debris (id INT, name VARCHAR(50), type VARCHAR(50), source VARCHAR(50), location POINT);", "sql": "SELECT AVG(DISTANCE(location, POINT(0, 0))) as average_distance FROM space_debris WHERE source = 'NASA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 136).", "schema": null, "sql": "select '$ ? (@.a < +.1e-1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@.a < +.1e-1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "List the virtual tours that are available in Spanish and have a cultural heritage focus, along with their average user rating.", "schema": "CREATE TABLE virtual_tour (id INT PRIMARY KEY, name VARCHAR(255), language VARCHAR(255), focus VARCHAR(255), user_rating DECIMAL(3,2)); INSERT INTO virtual_tour (id, name, language, focus, user_rating) VALUES (1, 'Tour of the Alhambra', 'Spanish', 'Cultural Heritage', 4.5);", "sql": "SELECT vt.name, vt.language, vt.focus, AVG(vt.user_rating) as avg_rating FROM virtual_tour vt WHERE vt.language = 'Spanish' AND vt.focus = 'Cultural Heritage' GROUP BY vt.name, vt.language, vt.focus;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Who are the volunteers that signed up in January and June of 2022?", "schema": "CREATE TABLE volunteers (volunteer_id INT, signup_date DATE); INSERT INTO volunteers (volunteer_id, signup_date) VALUES (1, '2022-01-05'), (2, '2022-03-30'), (3, '2022-04-15'), (4, '2022-06-10');", "sql": "SELECT volunteer_id, signup_date FROM volunteers WHERE (MONTH(signup_date) = 1 AND YEAR(signup_date) = 2022) OR (MONTH(signup_date) = 6 AND YEAR(signup_date) = 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 351).", "schema": null, "sql": "SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok\nSELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok\nSELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What is the Date of Week 2?", "schema": "CREATE TABLE table_name_47 (date VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_47 WHERE week = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the constructor when the tyre is d, the engine is talbot 23cv 4.5 l6, the chassis is talbot-lago t26c and the entrant is ecurie belge?", "schema": "CREATE TABLE table_name_75 (constructor VARCHAR, entrant VARCHAR, chassis VARCHAR, tyre VARCHAR, engine VARCHAR)", "sql": "SELECT constructor FROM table_name_75 WHERE tyre = 'd' AND engine = 'talbot 23cv 4.5 l6' AND chassis = 'talbot-lago t26c' AND entrant = 'ecurie belge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "How many 'Eco-Friendly' garments were sold in 'Australia' retail stores in Q1 of 2022?", "schema": "CREATE TABLE SalesStore (id INT PRIMARY KEY, store_name VARCHAR(50), location VARCHAR(50), garment_type VARCHAR(50), is_eco_friendly BOOLEAN, quantity INT, sale_date DATE); INSERT INTO SalesStore (id, store_name, location, garment_type, is_eco_friendly, quantity, sale_date) VALUES (1, 'Store D', 'Australia', 'Eco-Friendly T-Shirt', true, 30, '2022-01-15');", "sql": "SELECT SUM(quantity) as total_quantity FROM SalesStore WHERE location = 'Australia' AND is_eco_friendly = true AND sale_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the number of cases with a female judge and male defendant?", "schema": "CREATE TABLE cases (id INT, judge_gender VARCHAR(6), defendant_gender VARCHAR(6)); INSERT INTO cases (id, judge_gender, defendant_gender) VALUES (1, 'Female', 'Male'), (2, 'Male', 'Female'), (3, 'Female', 'Male');", "sql": "SELECT COUNT(*) FROM cases WHERE judge_gender = 'Female' AND defendant_gender = 'Male';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest swimsuit a contestant from Kansas with an average larger than 8.48, an interview higher than 8.58, and an evening gown higher than 8.82 has?", "schema": "CREATE TABLE table_name_69 (swimsuit INTEGER, state VARCHAR, evening_gown VARCHAR, average VARCHAR, interview VARCHAR)", "sql": "SELECT MAX(swimsuit) FROM table_name_69 WHERE average > 8.48 AND interview > 8.58 AND evening_gown > 8.82 AND state = 'kansas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of all the cities and states.", "schema": "CREATE TABLE addresses (town_city VARCHAR, state_province_county VARCHAR)", "sql": "SELECT town_city FROM addresses UNION SELECT state_province_county FROM addresses;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the enrollment at delaware valley college?", "schema": "CREATE TABLE table_261906_2 (joined_mac INTEGER, institution VARCHAR)", "sql": "SELECT MAX(joined_mac) FROM table_261906_2 WHERE institution = 'Delaware Valley College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the area of Tuscany?", "schema": "CREATE TABLE table_14532_1 (area__km²_ INTEGER, region VARCHAR)", "sql": "SELECT MAX(area__km²_) FROM table_14532_1 WHERE region = 'Tuscany';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What was the waste generation in Bangkok in 2019?", "schema": "CREATE TABLE waste_generation_bangkok (year INT, total_waste INT); INSERT INTO waste_generation_bangkok (year, total_waste) VALUES (2018, 150000), (2019, 170000), (2020, 185000);", "sql": "SELECT total_waste FROM waste_generation_bangkok WHERE year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Which destinations in Canada have the highest increase in visitors from 2019 to 2022?", "schema": "CREATE TABLE canada_tourism (destination VARCHAR(50), year INT, visitors INT); INSERT INTO canada_tourism (destination, year, visitors) VALUES ('Banff', 2019, 500000), ('Banff', 2022, 700000), ('Whistler', 2019, 300000), ('Whistler', 2022, 500000);", "sql": "SELECT destination, MAX(visitors) - MIN(visitors) AS increase FROM canada_tourism WHERE year IN (2019, 2022) GROUP BY destination ORDER BY increase DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which position from the Cherkassy Monkeys' club was born after 1978?", "schema": "CREATE TABLE table_name_10 (position VARCHAR, year_born VARCHAR, current_club VARCHAR)", "sql": "SELECT position FROM table_name_10 WHERE year_born > 1978 AND current_club = 'cherkassy monkeys';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Delete all records from the 'readers' table where 'country' is 'United States'", "schema": "CREATE TABLE readers (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO readers (id, name, country) VALUES (1, 'Alice Johnson', 'United States'); INSERT INTO readers (id, name, country) VALUES (2, 'Bob Williams', 'Canada');", "sql": "DELETE FROM readers WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average number of fouls committed by players from Argentina in the 'World Cup'?", "schema": "CREATE TABLE players (player_id INT, name TEXT, country TEXT); INSERT INTO players (player_id, name, country) VALUES (1, 'Messi', 'Argentina'), (2, 'Di Maria', 'Argentina'), (3, 'Kane', 'England'); CREATE TABLE fouls (foul_id INT, player_id INT, fouls INT); INSERT INTO fouls (foul_id, player_id, fouls) VALUES (1, 1, 2), (2, 1, 3), (3, 2, 1), (4, 3, 5); CREATE TABLE games (game_id INT, player_id INT, tournament TEXT); INSERT INTO games (game_id, player_id, tournament) VALUES (1, 1, 'World Cup'), (2, 1, 'World Cup'), (3, 2, 'World Cup'), (4, 3, 'World Cup');", "sql": "SELECT AVG(fouls) FROM fouls JOIN games ON fouls.player_id = games.player_id JOIN players ON fouls.player_id = players.player_id WHERE players.country = 'Argentina' AND games.tournament = 'World Cup';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "pgTAP test for Index (assertion 75).", "schema": null, "sql": "SELECT * FROM check_test(\n index_is_primary( 'idx_baz' ),\n false,\n 'index_is_primary() fail index only',\n 'Index idx_baz should be on a primary key',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "How many local businesses in Spain have benefited from sustainable tourism initiatives?", "schema": "CREATE TABLE local_businesses (business_id INT, business_name TEXT, country TEXT, sustainable_initiative BOOLEAN); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (1, 'La Ribera Market', 'Spain', TRUE); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (2, 'Gothic Quarter Shops', 'Spain', TRUE); INSERT INTO local_businesses (business_id, business_name, country, sustainable_initiative) VALUES (3, 'Barceloneta Fish Market', 'Spain', FALSE);", "sql": "SELECT COUNT(*) FROM local_businesses WHERE country = 'Spain' AND sustainable_initiative = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the goals for 8/4/04?", "schema": "CREATE TABLE table_name_70 (goals VARCHAR, date VARCHAR)", "sql": "SELECT goals FROM table_name_70 WHERE date = '8/4/04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total amount of donations made by donors from Palestine in the year 2019?", "schema": "CREATE TABLE donations (id INT, donor_id INT, donor_country TEXT, donation_date DATE, donation_amount DECIMAL); INSERT INTO donations (id, donor_id, donor_country, donation_date, donation_amount) VALUES (1, 1, 'Palestine', '2019-01-01', 50.00), (2, 2, 'Palestine', '2019-06-01', 100.00), (3, 3, 'Palestine', '2019-12-31', 25.00);", "sql": "SELECT SUM(donation_amount) FROM donations WHERE donor_country = 'Palestine' AND YEAR(donation_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 296).", "schema": null, "sql": "CREATE TABLE s1 (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the average product price for each category in the 'sustainable_products' table?", "schema": "CREATE TABLE sustainable_products (product_id INT, category VARCHAR(255), price DECIMAL(10,2));", "sql": "SELECT category, AVG(price) FROM sustainable_products GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every Gregorian month when the season in Tamil is இளவேனில்?", "schema": "CREATE TABLE table_1740431_3 (gregorian_months VARCHAR, season_in_tamil VARCHAR)", "sql": "SELECT gregorian_months FROM table_1740431_3 WHERE season_in_tamil = 'இளவேனில்';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the population with a code more than 90902 and an area less than 1,335.47?", "schema": "CREATE TABLE table_name_19 (population INTEGER, code VARCHAR, area__km_2__ VARCHAR)", "sql": "SELECT MIN(population) FROM table_name_19 WHERE code > 90902 AND area__km_2__ < 1 OFFSET 335.47;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Display the total quantity of sustainable materials used by ethical fashion brands in the last quarter.", "schema": "CREATE TABLE Dates (date_id INT, date DATE); INSERT INTO Dates (date_id, date) VALUES (1, '2022-01-01'), (2, '2022-04-01'), (3, '2022-07-01'), (4, '2022-10-01'); CREATE TABLE SustainableBrands (brand_id INT, material_id INT, quantity INT, date_id INT); INSERT INTO SustainableBrands (brand_id, material_id, quantity, date_id) VALUES (1, 1, 500, 4), (1, 2, 300, 4), (2, 3, 700, 4), (3, 1, 400, 4);", "sql": "SELECT SUM(quantity) FROM SustainableBrands INNER JOIN Dates ON SustainableBrands.date_id = Dates.date_id WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) AND EXISTS (SELECT * FROM Brands WHERE Brands.brand_id = SustainableBrands.brand_id AND location = 'Europe');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the champion of the 1994 season?", "schema": "CREATE TABLE table_2527617_1 (champion VARCHAR, season VARCHAR)", "sql": "SELECT champion FROM table_2527617_1 WHERE season = 1994;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Country has a Score of 70-68-70-68=276?", "schema": "CREATE TABLE table_name_43 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_43 WHERE score = 70 - 68 - 70 - 68 = 276;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the last 10 meetings that have 11/26/1988 as the lasr meeting?", "schema": "CREATE TABLE table_name_32 (last_10_meetings VARCHAR, last_meeting VARCHAR)", "sql": "SELECT last_10_meetings FROM table_name_32 WHERE last_meeting = '11/26/1988';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 252).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _ikeys( NAME, NAME, NAME);", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List ids for all student who are on scholarship.", "schema": "CREATE TABLE Sportsinfo (StuID VARCHAR, onscholarship VARCHAR)", "sql": "SELECT StuID FROM Sportsinfo WHERE onscholarship = 'Y';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "List the total number of pollution control initiatives implemented in the Atlantic and Arctic regions.", "schema": "CREATE TABLE PollutionControl (id INT, initiative VARCHAR(50), region VARCHAR(20)); INSERT INTO PollutionControl (id, initiative, region) VALUES (1, 'Ocean Cleanup', 'Arctic'), (2, 'Plastic Reduction', 'Atlantic'), (3, 'Carbon Capture', 'Global');", "sql": "SELECT region, COUNT(*) as total_initiatives FROM PollutionControl WHERE region IN ('Atlantic', 'Arctic') GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the 3-0 Win result?", "schema": "CREATE TABLE table_name_91 (score VARCHAR, result VARCHAR)", "sql": "SELECT score FROM table_name_91 WHERE result = '3-0 win';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The station located in Albuquerque has been owned since what year?", "schema": "CREATE TABLE table_11147852_1 (owned_since VARCHAR, city_of_license_market VARCHAR)", "sql": "SELECT owned_since FROM table_11147852_1 WHERE city_of_license_market = 'Albuquerque';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average salary by department for employees who have been trained?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary FLOAT, Trained BOOLEAN); INSERT INTO Employees (EmployeeID, Department, Salary, Trained) VALUES (1, 'IT', 75000.0, 1), (2, 'HR', 65000.0, 0), (3, 'IT', 80000.0, 1);", "sql": "SELECT Department, AVG(Salary) FROM Employees WHERE Trained = 1 GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "List all tables and views in the 'telecom' schema", "schema": "CREATE SCHEMA telecom; CREATE TABLE mobile_subscribers (id INT, name TEXT, data_plan TEXT); CREATE VIEW broadband_subscribers AS SELECT * FROM subscribers WHERE type = 'broadband'; CREATE TABLE network_investments (year INT, amount FLOAT); CREATE TABLE compliance_reports (quarter INT, filed BOOLEAN);", "sql": "SELECT * FROM information_schema.tables WHERE table_schema = 'telecom';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score when they played the Boston Patriots?", "schema": "CREATE TABLE table_name_91 (result VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_91 WHERE opponent = 'boston patriots';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "List the rural infrastructure projects and their budgets for 'rural_area_1' from the 'rural_infrastructure' and 'community_development' tables", "schema": "CREATE TABLE rural_infrastructure (project_id INT, project_type VARCHAR(50), budget INT, area_id INT); CREATE TABLE community_development (area_id INT, area_name VARCHAR(50));", "sql": "SELECT r.project_type, r.budget FROM rural_infrastructure r INNER JOIN community_development c ON r.area_id = c.area_id WHERE c.area_name = 'rural_area_1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Show the total installed capacity of renewable energy projects for each location", "schema": "CREATE TABLE renewable_energy ( id INT PRIMARY KEY, location VARCHAR(255), project_name VARCHAR(255), installed_capacity INT ); INSERT INTO renewable_energy (id, location, project_name, installed_capacity) VALUES (1, 'Germany', 'Solarpark Finow Tower', 45000); INSERT INTO renewable_energy (id, location, project_name, installed_capacity) VALUES (2, 'France', 'La Plaine Wind Farm', 60000);", "sql": "SELECT location, SUM(installed_capacity) FROM renewable_energy GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List all countries with their respective number of podcasts and the total duration of those podcasts.", "schema": "CREATE TABLE podcasts (id INT, name VARCHAR(255), country VARCHAR(255), duration INT); INSERT INTO podcasts (id, name, country, duration) VALUES (1, 'Podcast1', 'USA', 100), (2, 'Podcast2', 'UK', 200);", "sql": "SELECT country, COUNT(*) as num_podcasts, SUM(duration) as total_duration FROM podcasts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the WC 1974 Qualifying game?", "schema": "CREATE TABLE table_name_94 (date VARCHAR, type_of_game VARCHAR)", "sql": "SELECT date FROM table_name_94 WHERE type_of_game = 'wc 1974 qualifying';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Subtransaction (example 31).", "schema": null, "sql": "SELECT subtransaction_exit_without_enter();", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Subtransaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "List all volunteers who worked on 'Food Security' program in 'Asia'", "schema": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName TEXT, Country TEXT); INSERT INTO Volunteers (VolunteerID, VolunteerName, Country) VALUES (1, 'Anna Lee', 'Korea'); INSERT INTO Volunteers (VolunteerID, VolunteerName, Country) VALUES (2, 'Ravi Patel', 'India'); CREATE TABLE VolunteerHours (VolunteerID INT, Program TEXT, Hours DECIMAL); INSERT INTO VolunteerHours (VolunteerID, Program, Hours) VALUES (1, 'Food Security', 20); INSERT INTO VolunteerHours (VolunteerID, Program, Hours) VALUES (2, 'Food Security', 30);", "sql": "SELECT Volunteers.VolunteerName FROM Volunteers INNER JOIN VolunteerHours ON Volunteers.VolunteerID = VolunteerHours.VolunteerID WHERE VolunteerHours.Program = 'Food Security' AND Volunteers.Country = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the positions of players who's hometown is concord, california", "schema": "CREATE TABLE table_11677691_12 (position VARCHAR, hometown VARCHAR)", "sql": "SELECT position FROM table_11677691_12 WHERE hometown = 'Concord, California';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 160).", "schema": null, "sql": "SELECT cube_ur_coord('(1,2),(1,2)'::cube, 2);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1} {"question": "Identify users who have posted more than 50 posts in the 'social_media' table.", "schema": "CREATE TABLE social_media (user_id INT, posts_count INT);", "sql": "SELECT user_id FROM social_media WHERE posts_count > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did the Falcons score when the record was 4-4?", "schema": "CREATE TABLE table_16710971_2 (falcons_points VARCHAR, record VARCHAR)", "sql": "SELECT falcons_points FROM table_16710971_2 WHERE record = '4-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many countries in Europe have been promoting sustainable tourism since 2019?", "schema": "CREATE TABLE Sustainable_Practices ( id INT PRIMARY KEY, country_id INT, certification_date DATE, FOREIGN KEY (country_id) REFERENCES Countries(id) ); INSERT INTO Sustainable_Practices (id, country_id, certification_date) VALUES (1, 7, '2019-07-01'); INSERT INTO Sustainable_Practices (id, country_id, certification_date) VALUES (2, 8, '2020-03-01');", "sql": "SELECT COUNT(DISTINCT c.id) as country_count FROM Countries c INNER JOIN Sustainable_Practices sp ON c.id = sp.country_id WHERE c.continent = 'Europe' AND sp.certification_date >= '2019-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "What is the maximum duration of a space mission to Mars in days?", "schema": "CREATE TABLE SpaceMissions (MissionID INT, Name VARCHAR(50), LaunchDate DATE, Duration INT); INSERT INTO SpaceMissions VALUES (1, 'Curiosity', '2012-11-26', 669), (2, 'Perseverance', '2020-07-30', 342), (3, 'InSight', '2018-05-05', 913);", "sql": "SELECT MAX(Duration) FROM SpaceMissions WHERE Destination = 'Mars';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Count the number of esports events in 2022", "schema": "CREATE TABLE Esports_Events (id INT, name VARCHAR(50), event_date DATE); INSERT INTO Esports_Events (id, name, event_date) VALUES (1, 'Dreamhack', '2022-01-01'), (2, 'ESL One', '2021-01-01'), (3, 'IEM', '2022-03-01');", "sql": "SELECT COUNT(*) FROM Esports_Events WHERE event_date BETWEEN '2022-01-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Find the number of founders who are women", "schema": "CREATE TABLE startup (id INT, name TEXT, founding_year INT, founder_gender TEXT);", "sql": "SELECT COUNT(*) FROM startup WHERE founder_gender = 'woman';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Transaction (example 23).", "schema": null, "sql": "$$;\n\nCALL transaction_test5();\n\n\n-- SECURITY DEFINER currently disallow transaction statements\nCREATE PROCEDURE transaction_test5b()\nLANGUAGE plpgsql\nSECURITY DEFINER\nAS $$\nBEGIN\n COMMIT;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Transaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 190, "num_statements": 3} {"question": "How many astronauts are there in total from India and Japan?", "schema": "CREATE TABLE Astronauts (astronaut_id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO Astronauts (astronaut_id, name, country) VALUES (1001, 'Rakesh Sharma', 'India'), (1002, 'Kalpana Chawla', 'US'), (1003, 'Sunita Williams', 'US'), (2001, 'Takao Doi', 'Japan'), (2002, 'Naoko Yamazaki', 'Japan');", "sql": "SELECT COUNT(*) FROM Astronauts WHERE country = 'India' OR country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Which vessels have a compliance score below 70 and have traveled to the Arctic Ocean?", "schema": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, compliance_score INT);CREATE TABLE routes (id INT, vessel_id INT, destination TEXT, date DATE); INSERT INTO vessels (id, name, type, compliance_score) VALUES (1, 'VesselF', 'Cargo', 65); INSERT INTO routes (id, vessel_id, destination, date) VALUES (1, 1, 'Arctic', '2022-02-15');", "sql": "SELECT v.name FROM vessels v JOIN routes r ON v.id = r.vessel_id WHERE v.compliance_score < 70 AND r.destination = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the maximum billing amount for each attorney in the 'billing' table, grouped by their specialty?", "schema": "CREATE TABLE attorney (attorney_id INT, specialty VARCHAR(255)); INSERT INTO attorney (attorney_id, specialty) VALUES (1, 'Criminal Law'), (2, 'Family Law'), (3, 'Personal Injury'); CREATE TABLE billing (bill_id INT, attorney_id INT, amount DECIMAL(10,2)); INSERT INTO billing (bill_id, attorney_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 600.00), (4, 3, 800.00), (5, 3, 900.00);", "sql": "SELECT specialty, MAX(amount) FROM billing JOIN attorney ON billing.attorney_id = attorney.attorney_id GROUP BY specialty;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 103).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_type( 'public'::name, '__foobarbaz__'::name ),\n true,\n 'hasnt_type(scheam, type)',\n 'Type public.__foobarbaz__ should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What duration is listed for Christian de la Fuente?", "schema": "CREATE TABLE table_11210576_3 (duration VARCHAR, actor VARCHAR)", "sql": "SELECT duration FROM table_11210576_3 WHERE actor = 'Christian de la Fuente';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location code with the most documents?", "schema": "CREATE TABLE Document_locations (location_code VARCHAR)", "sql": "SELECT location_code FROM Document_locations GROUP BY location_code ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "How many marine species are endangered in the Indian Ocean?", "schema": "CREATE TABLE marine_species (name TEXT, region TEXT, endangered BOOLEAN); INSERT INTO marine_species (name, region, endangered) VALUES ('Whale Shark', 'Indian Ocean', TRUE), ('Dugong', 'Indian Ocean', TRUE);", "sql": "SELECT COUNT(*) FROM marine_species WHERE region = 'Indian Ocean' AND endangered = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Fiji's lowest total?", "schema": "CREATE TABLE table_name_63 (total INTEGER, country VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_63 WHERE country = 'fiji';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 177).", "schema": null, "sql": "SELECT cube_enlarge('(0)'::cube, 1, 2);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "Get the total area of the ocean floor mapped by region", "schema": "CREATE TABLE ocean_floor_mapping (mapping_id INT, region VARCHAR(255), area INT);", "sql": "SELECT region, SUM(area) FROM ocean_floor_mapping GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of employees working in the 'manufacturing' department, excluding any employees who also appear in the 'training' table?", "schema": "CREATE TABLE companies (company_id INT, department VARCHAR(20)); INSERT INTO companies (company_id, department) VALUES (1, 'manufacturing'), (2, 'HR'), (3, 'manufacturing'); CREATE TABLE employees (employee_id INT, company_id INT); CREATE TABLE training (employee_id INT, training VARCHAR(20)); INSERT INTO employees (employee_id, company_id) VALUES (1, 1), (2, 1), (3, 2); INSERT INTO training (employee_id, training) VALUES (1, 'welding'), (2, 'safety'), (3, 'safety');", "sql": "SELECT COUNT(*) FROM companies INNER JOIN employees ON companies.company_id = employees.company_id WHERE companies.department = 'manufacturing' AND employees.employee_id NOT IN (SELECT employee_id FROM training);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game played in the 2012 Africa Cup of Nations?", "schema": "CREATE TABLE table_name_90 (score VARCHAR, competition VARCHAR)", "sql": "SELECT score FROM table_name_90 WHERE competition = '2012 africa cup of nations';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the score 3–3, and away team was Barnet?", "schema": "CREATE TABLE table_name_96 (date VARCHAR, score VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_96 WHERE score = '3–3' AND away_team = 'barnet';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 759).", "schema": null, "sql": "CREATE STATISTICS s_expr ON mod(id, 2), lower(col) FROM stats_ext_tbl;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the startup for the Project Named of taq taq ph 2?", "schema": "CREATE TABLE table_name_51 (year_startup VARCHAR, project_name VARCHAR)", "sql": "SELECT year_startup FROM table_name_51 WHERE project_name = 'taq taq ph 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the maximum number of cases handled in a year by any mediation center?", "schema": "CREATE TABLE AnnualCasesHandled (ID INT, MediationCenter VARCHAR(50), Year INT, Cases INT); INSERT INTO AnnualCasesHandled (ID, MediationCenter, Year, Cases) VALUES (1, 'PeaceBuilders', 2005, 120), (2, 'CommunityHealers', 2010, 150), (3, 'HarmonyKeepers', 2008, 210), (4, 'UnityCreators', 2015, 200);", "sql": "SELECT MAX(Cases) FROM AnnualCasesHandled;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has a margin of victory of 1 stroke over Greg Kraft?", "schema": "CREATE TABLE table_name_21 (date VARCHAR, margin_of_victory VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT date FROM table_name_21 WHERE margin_of_victory = '1 stroke' AND runner_s__up = 'greg kraft';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 6).", "schema": null, "sql": "SELECT * FROM test_bpchar WHERE i<='abc' ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'returning' (example 47).", "schema": null, "sql": "DELETE FROM foo WHERE f2 = 'zit' RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Returning.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'hash_index' (example 67).", "schema": null, "sql": "INSERT INTO hash_split_heap SELECT a/2 FROM generate_series(1, 25000) a;", "explanation": "DML from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total value of all transactions on the Polygon network, grouped by month?", "schema": "CREATE TABLE polygon_transactions (transaction_id INT, tx_time TIMESTAMP, value DECIMAL(10, 2)); INSERT INTO polygon_transactions (transaction_id, tx_time, value) VALUES (1, '2022-01-01 10:00:00', 100), (2, '2022-01-02 11:00:00', 200), (3, '2022-01-03 12:00:00', 300), (4, '2022-01-04 13:00:00', 400), (5, '2022-01-05 14:00:00', 500);", "sql": "SELECT DATE_FORMAT(tx_time, '%Y-%m') AS month, SUM(value) AS total_value FROM polygon_transactions GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the maximum construction cost of public works projects in the 'Africa' continent?", "schema": "CREATE TABLE Projects (id INT, name TEXT, country TEXT, cost FLOAT); INSERT INTO Projects (id, name, country, cost) VALUES (1, 'ProjectA', 'CountryX', 2000000.00), (2, 'ProjectB', 'CountryY', 2500500.75), (3, 'ProjectC', 'CountryZ', 1800000.50), (4, 'ProjectD', 'CountryA', 3000000.00); CREATE TABLE Countries (id INT, name TEXT, continent TEXT); INSERT INTO Countries (id, name, continent) VALUES (1, 'CountryX', 'Africa'), (2, 'CountryY', 'Africa'), (3, 'CountryZ', 'Europe'), (4, 'CountryA', 'Africa');", "sql": "SELECT MAX(cost) FROM Projects INNER JOIN Countries ON Projects.country = Countries.name WHERE Countries.continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 713).", "schema": null, "sql": "SELECT to_char('100'::numeric, 'f\"\\\\ool\"999');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('100'::numeric, 'f\"\\\\ool\"999')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the percentage of menu items that are vegetarian for each cuisine type?", "schema": "CREATE TABLE restaurants (id INT, name VARCHAR(50), cuisine VARCHAR(50), menu_item_id INT); CREATE TABLE menu_items (id INT, name VARCHAR(50), vegetarian BOOLEAN); INSERT INTO restaurants (id, name, cuisine, menu_item_id) VALUES (1, 'Restaurant A', 'Italian', 1), (2, 'Restaurant B', 'Mexican', 2), (3, 'Restaurant C', 'Chinese', 3); INSERT INTO menu_items (id, name, vegetarian) VALUES (1, 'Pizza Margherita', FALSE), (2, 'Tacos Al Pastor', FALSE), (3, 'Kung Pao Chicken', FALSE), (4, 'Vegetable Stir Fry', TRUE);", "sql": "SELECT r.cuisine, 100.0 * COUNT(CASE WHEN m.vegetarian THEN 1 END) / COUNT(*) as vegetarian_percentage FROM restaurants r JOIN menu_items m ON r.menu_item_id = m.id GROUP BY r.cuisine;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For which tournament was the margin of victory 7 strokes?", "schema": "CREATE TABLE table_name_50 (tournament VARCHAR, margin_of_victory VARCHAR)", "sql": "SELECT tournament FROM table_name_50 WHERE margin_of_victory = '7 strokes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of basketball games played in the 2022 season?", "schema": "CREATE TABLE games (season TEXT, sport TEXT, total_games INT); INSERT INTO games (season, sport, total_games) VALUES ('2022', 'Basketball', 1230); INSERT INTO games (season, sport, total_games) VALUES ('2022', 'Football', 272);", "sql": "SELECT SUM(total_games) FROM games WHERE season = '2022' AND sport = 'Basketball';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Find the average renewable energy production of each city in France.", "schema": "CREATE TABLE renewable_energy_france (id INT, city VARCHAR(255), production FLOAT); INSERT INTO renewable_energy_france (id, city, production) VALUES (1, 'Paris', 2000), (2, 'Lyon', 2500), (3, 'Marseille', 1500);", "sql": "SELECT city, AVG(production) FROM renewable_energy_france GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the earliest founded university?", "schema": "CREATE TABLE table_261941_1 (founded INTEGER)", "sql": "SELECT MIN(founded) FROM table_261941_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the B.P. of club Halifax?", "schema": "CREATE TABLE table_19179465_1 (bp VARCHAR, club VARCHAR)", "sql": "SELECT bp FROM table_19179465_1 WHERE club = 'Halifax';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 197).", "schema": null, "sql": "select jsonb_object('{a,b,c,\"d e f\"}','{1,2,3,\"a b c\",g}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_object('{a,b,c,\"d e f\"}','{1,2,3,\"a b c\",g}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of entrepreneurs?", "schema": "CREATE TABLE people (Name VARCHAR, People_ID VARCHAR); CREATE TABLE entrepreneur (People_ID VARCHAR)", "sql": "SELECT T2.Name FROM entrepreneur AS T1 JOIN people AS T2 ON T1.People_ID = T2.People_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the wallington new foresters are in division four what is the season?", "schema": "CREATE TABLE table_24575253_4 (season VARCHAR, division_four VARCHAR)", "sql": "SELECT season FROM table_24575253_4 WHERE division_four = 'Wallington New Foresters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the rank of the player that left due to right wrist surgery?", "schema": "CREATE TABLE table_24431264_18 (rank VARCHAR, withdrew_due_to VARCHAR)", "sql": "SELECT rank FROM table_24431264_18 WHERE withdrew_due_to = 'right wrist surgery';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: where did arron oberholser play?", "schema": "CREATE TABLE table_name_84 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_84 WHERE player = 'arron oberholser';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'misc': Write the SELECT query (example 52).", "schema": null, "sql": "SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));", "explanation": "Regression test for Misc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average temperature required for growing cotton?", "schema": "CREATE TABLE Crop (id INT, name VARCHAR(255), minimum_temperature INT, maximum_temperature INT); INSERT INTO Crop (id, name, minimum_temperature, maximum_temperature) VALUES (1, 'Cotton', 15, 30), (2, 'Soybean', 10, 35), (3, 'Corn', 5, 30);", "sql": "SELECT AVG(Crop.minimum_temperature + (Crop.maximum_temperature - Crop.minimum_temperature)/2) FROM Crop WHERE Crop.name = 'Cotton';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest total with a t12 finish?", "schema": "CREATE TABLE table_name_61 (total INTEGER, finish VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_61 WHERE finish = 't12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total production capacity of all tanks located in the Western region?", "schema": "CREATE TABLE Production (tank VARCHAR(20), capacity INT, location VARCHAR(20)); INSERT INTO Production (tank, capacity, location) VALUES ('Tank7', 200000, 'Western'), ('Tank8', 250000, 'Western');", "sql": "SELECT SUM(capacity) FROM Production WHERE location = 'Western';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total investment in network infrastructure for each quarter of the year?", "schema": "CREATE TABLE infrastructure_investments (investment_date DATE, investment_amount DECIMAL(10,2)); INSERT INTO infrastructure_investments (investment_date, investment_amount) VALUES ('2021-01-01', 250000), ('2021-04-01', 300000), ('2021-07-01', 200000), ('2021-10-01', 350000);", "sql": "SELECT EXTRACT(QUARTER FROM investment_date) AS quarter, SUM(investment_amount) FROM infrastructure_investments GROUP BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the total Shariah-compliant loan amount issued per month?", "schema": "CREATE TABLE shariah_compliant_loans_over_time (id INT, loan_date DATE, amount FLOAT); INSERT INTO shariah_compliant_loans_over_time (id, loan_date, amount) VALUES (1, '2021-01-01', 350000), (2, '2021-02-01', 400000), (3, '2021-03-01', 450000), (4, '2021-01-01', 200000), (5, '2021-02-01', 300000);", "sql": "SELECT DATE_FORMAT(loan_date, '%Y-%m') as month, SUM(amount) as total_amount FROM shariah_compliant_loans_over_time GROUP BY month ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What is the total production quantity (in metric tons) of Holmium from the mine with the ID 1 for the year 2017?", "schema": "CREATE TABLE production (id INT, mine_id INT, year INT, element TEXT, production_quantity INT); INSERT INTO production (id, mine_id, year, element, production_quantity) VALUES (1, 1, 2017, 'Holmium', 100), (2, 2, 2017, 'Holmium', 150), (3, 3, 2017, 'Holmium', 200), (4, 1, 2017, 'Dysprosium', 200), (5, 2, 2017, 'Dysprosium', 250), (6, 3, 2017, 'Dysprosium', 300);", "sql": "SELECT SUM(production_quantity) FROM production WHERE mine_id = 1 AND year = 2017 AND element = 'Holmium';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Set 2 has a Date of 25 may, and a Set 3 of 21–25?", "schema": "CREATE TABLE table_name_45 (set_2 VARCHAR, date VARCHAR, set_3 VARCHAR)", "sql": "SELECT set_2 FROM table_name_45 WHERE date = '25 may' AND set_3 = '21–25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the poles with pts being 81", "schema": "CREATE TABLE table_14139408_1 (poles VARCHAR, pts VARCHAR)", "sql": "SELECT poles FROM table_14139408_1 WHERE pts = '81';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 509).", "schema": null, "sql": "select p.* from\n (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k\n where p.k = 1 and p.k = 2;", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select p.* from\n (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k\n where p.k = 1 and p.k = 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par of the player with a 68-67-75=210?", "schema": "CREATE TABLE table_name_99 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_99 WHERE score = 68 - 67 - 75 = 210;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the minimum voted yes where percent no is 68.2", "schema": "CREATE TABLE table_120778_2 (voted_yes INTEGER, percent_no VARCHAR)", "sql": "SELECT MIN(voted_yes) FROM table_120778_2 WHERE percent_no = '68.2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which train stations have accessibility features?", "schema": "CREATE TABLE train_stations (station_id INT, station_name TEXT, is_accessible BOOLEAN); INSERT INTO train_stations (station_id, station_name, is_accessible) VALUES (1, 'Union Station', true), (2, 'City Hall', false), (3, 'Downtown Crossing', true);", "sql": "SELECT station_id, station_name, is_accessible FROM train_stations WHERE is_accessible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: HOW MUCH WAS THE OVERALL FOR ERIK KARLSSON?", "schema": "CREATE TABLE table_11803648_17 (overall INTEGER)", "sql": "SELECT MIN(overall) FROM table_11803648_17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rank of Manuel Cortina Martínez?", "schema": "CREATE TABLE table_name_46 (rank VARCHAR, athletes VARCHAR)", "sql": "SELECT rank FROM table_name_46 WHERE athletes = 'manuel cortina martínez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes had their first air date on March 6, 2008?", "schema": "CREATE TABLE table_11220799_2 (episode_titles VARCHAR, first_air_date VARCHAR)", "sql": "SELECT COUNT(episode_titles) FROM table_11220799_2 WHERE first_air_date = 'March 6, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the maximum number of military personnel deployed by African Union in peacekeeping operations?", "schema": "CREATE SCHEMA if not exists defense; CREATE TABLE if not exists african_union_pk_operations (id INT PRIMARY KEY, year INT, military_personnel INT); INSERT INTO african_union_pk_operations (id, year, military_personnel) VALUES (1, 2018, 3000), (2, 2019, 3500), (3, 2020, 4000), (4, 2021, 4500);", "sql": "SELECT MAX(military_personnel) FROM defense.african_union_pk_operations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Which players from the 'players' table have the highest average scores in the 'scores' table, and how many high scores did they achieve?", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(50)); INSERT INTO players VALUES (1, 'John'); INSERT INTO players VALUES (2, 'Jane'); CREATE TABLE scores (score_id INT, player_id INT, score INT); INSERT INTO scores VALUES (1, 1, 90); INSERT INTO scores VALUES (2, 1, 95); INSERT INTO scores VALUES (3, 2, 85); INSERT INTO scores VALUES (4, 2, 88);", "sql": "SELECT p.name, AVG(s.score) as avg_score, COUNT(*) as high_scores FROM players p JOIN scores s ON p.player_id = s.player_id WHERE s.score >= (SELECT AVG(score) FROM scores) GROUP BY p.player_id ORDER BY avg_score DESC, high_scores DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "What is the average number of green buildings per state in the 'green_buildings' table?", "schema": "CREATE TABLE green_buildings (state VARCHAR(255), building_type VARCHAR(255));", "sql": "SELECT state, AVG(cnt) FROM (SELECT state, COUNT(*) AS cnt FROM green_buildings GROUP BY state) AS state_building_counts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people wrote episode number 2 of the season?", "schema": "CREATE TABLE table_25737761_4 (writer VARCHAR, _number VARCHAR)", "sql": "SELECT COUNT(writer) FROM table_25737761_4 WHERE _number = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Who are the genetic researchers working on gene editing techniques in the UK?", "schema": "CREATE SCHEMA if not exists genetics; CREATE TABLE if not exists genetics.researchers (id INT, name VARCHAR(100), country VARCHAR(50), expertise VARCHAR(50)); INSERT INTO genetics.researchers (id, name, country, expertise) VALUES (1, 'John Doe', 'UK', 'CRISPR'); INSERT INTO genetics.researchers (id, name, country, expertise) VALUES (2, 'Jane Smith', 'US', 'CRISPR');", "sql": "SELECT name FROM genetics.researchers WHERE country = 'UK' AND expertise = 'CRISPR';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many 2006 subscribers are named Vodafone?", "schema": "CREATE TABLE table_29395291_2 (subscribers__2006___thousands_ VARCHAR, provider VARCHAR)", "sql": "SELECT COUNT(subscribers__2006___thousands_) FROM table_29395291_2 WHERE provider = 'Vodafone';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the party where the constituency is 10. Tindivanam?", "schema": "CREATE TABLE table_22753245_1 (party VARCHAR, constituency VARCHAR)", "sql": "SELECT party FROM table_22753245_1 WHERE constituency = '10. Tindivanam';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Ips-provectus transmittance/contrast ratio?", "schema": "CREATE TABLE table_name_16 (transmittance__contrast_ratio VARCHAR, name VARCHAR)", "sql": "SELECT transmittance__contrast_ratio FROM table_name_16 WHERE name = 'ips-provectus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the minimum number of visits for any exhibition?", "schema": "CREATE TABLE ExhibitionStats (exhibition_id INT, min_visits INT, max_visits INT); INSERT INTO ExhibitionStats (exhibition_id, min_visits, max_visits) VALUES (1, 1000, 2000), (2, 1500, 2500), (3, 2000, 3000);", "sql": "SELECT MIN(min_visits) FROM ExhibitionStats;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "What is the total volume of freight forwarded from Japan to Singapore?", "schema": "CREATE TABLE Singapore_Freight (id INT, origin_country VARCHAR(50), destination_country VARCHAR(50), volume FLOAT); INSERT INTO Singapore_Freight (id, origin_country, destination_country, volume) VALUES (1, 'Japan', 'Singapore', 123.5), (2, 'Japan', 'Singapore', 234.6), (3, 'Malaysia', 'Singapore', 345.7);", "sql": "SELECT SUM(volume) FROM Singapore_Freight WHERE origin_country = 'Japan' AND destination_country = 'Singapore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'strings' (example 83).", "schema": null, "sql": "--\n-- test SQL string functions\n-- E### and T### are feature reference numbers from SQL99\n--\n\n-- E021-09 trim function\nSELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS \"bunch o blanks\";", "explanation": "PL/pgSQL object from PostgreSQL core test for Strings.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Cultural and Educational Panel has a Labour Panel larger than 5, and an Industrial and Commercial Panel larger than 9?", "schema": "CREATE TABLE table_name_9 (cultural_and_educational_panel VARCHAR, labour_panel VARCHAR, industrial_and_commercial_panel VARCHAR)", "sql": "SELECT COUNT(cultural_and_educational_panel) FROM table_name_9 WHERE labour_panel > 5 AND industrial_and_commercial_panel > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result for the Green-Communist party when the Left Bloc has 3.0%?", "schema": "CREATE TABLE table_name_87 (green_communist VARCHAR, left_bloc VARCHAR)", "sql": "SELECT green_communist FROM table_name_87 WHERE left_bloc = '3.0%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total recycling rate for each region in the year 2020?", "schema": "CREATE TABLE RecyclingAmount (region VARCHAR(50), year INT, amount FLOAT); INSERT INTO RecyclingAmount (region, year, amount) VALUES ('Region1', 2018, 600000.0), ('Region1', 2019, 700000.0), ('Region1', 2020, 800000.0), ('Region2', 2018, 500000.0), ('Region2', 2019, 600000.0), ('Region2', 2020, 700000.0);", "sql": "SELECT r.region, (SUM(r.amount) / (SELECT SUM(wg.amount) FROM WasteGeneration wg WHERE wg.year = 2020 AND EXISTS (SELECT 1 FROM RecyclingCenters rc WHERE rc.region = wg.city)) * 100) FROM RecyclingAmount r WHERE r.year = 2020 GROUP BY r.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the average points that have december 27?", "schema": "CREATE TABLE table_name_36 (points INTEGER, december VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_36 WHERE december = 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Which countries in Africa have a carbon pricing (in USD/ton) that is lower than the average for the continent?", "schema": "CREATE TABLE africa_carbon_pricing (id INT, country VARCHAR(50), price FLOAT); INSERT INTO africa_carbon_pricing (id, country, price) VALUES (1, 'South Africa', 10.5), (2, 'Egypt', 15.2), (3, 'Nigeria', 5.1);", "sql": "SELECT country, price FROM africa_carbon_pricing WHERE price < (SELECT AVG(price) FROM africa_carbon_pricing);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "How many rural hospitals are in the \"rural_hospitals_2\" table?", "schema": "CREATE TABLE rural_hospitals_2 (id INT, name TEXT, location TEXT, capacity INT); INSERT INTO rural_hospitals_2 (id, name, location, capacity) VALUES (1, 'Hospital C', 'City3', 75), (2, 'Hospital D', 'City4', 60);", "sql": "SELECT COUNT(*) FROM rural_hospitals_2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "What is the total revenue generated by ticket sales for each month and team?", "schema": "CREATE TABLE monthly_ticket_sales (ticket_id INT, team_id INT, date DATE, price INT);", "sql": "SELECT EXTRACT(MONTH FROM date) as month, team_id, SUM(price) as total_revenue FROM monthly_ticket_sales GROUP BY month, team_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the tries against where points is 60?", "schema": "CREATE TABLE table_12828723_4 (tries_against VARCHAR, points VARCHAR)", "sql": "SELECT tries_against FROM table_12828723_4 WHERE points = '60';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the minimum laid down?", "schema": "CREATE TABLE table_12592074_1 (laid_down INTEGER)", "sql": "SELECT MIN(laid_down) FROM table_12592074_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many states are there?", "schema": "CREATE TABLE area_code_state (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM area_code_state;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "How many unique fans reside in 'FL' and have an average ticket spending of over $50 in the 'fan_demographics' and 'ticket_sales' tables?", "schema": "CREATE TABLE fan_demographics (fan_id INT, age INT, state VARCHAR(2)); CREATE TABLE ticket_sales (ticket_id INT, fan_id INT, event_id INT, price DECIMAL(5,2));", "sql": "SELECT COUNT(DISTINCT fan_id) FROM fan_demographics fd JOIN ticket_sales ts ON fd.fan_id = ts.fan_id WHERE fd.state = 'FL' AND (ts.price / (SELECT COUNT(*) FROM ticket_sales ts2 WHERE ts.fan_id = ts2.fan_id)) > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did Footscray play as the away team?", "schema": "CREATE TABLE table_name_27 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_27 WHERE away_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The wildcats belong to what school?", "schema": "CREATE TABLE table_name_38 (school VARCHAR, team VARCHAR)", "sql": "SELECT school FROM table_name_38 WHERE team = 'wildcats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the oldest chemical still in production?", "schema": "CREATE TABLE chemical_lifecycle (id INT PRIMARY KEY, chemical_name VARCHAR(255), year_introduced INT, production_status VARCHAR(255)); INSERT INTO chemical_lifecycle (id, chemical_name, year_introduced, production_status) VALUES (1, 'Hydrochloric Acid', 1950, 'Produced'); INSERT INTO chemical_lifecycle (id, chemical_name, year_introduced, production_status) VALUES (2, 'Sodium Hydroxide', 1980, 'Discontinued');", "sql": "SELECT chemical_name, MIN(year_introduced) AS first_introduced FROM chemical_lifecycle WHERE production_status = 'Produced' GROUP BY chemical_name ORDER BY first_introduced;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'spgist' (example 1).", "schema": null, "sql": "create table spgist_point_tbl(id int4, p point);", "explanation": "DDL from PostgreSQL core regression test for Spgist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Find the average age and total salary of employees in the 'mining_operations' table, for employees with the job_title 'Engineer'?", "schema": "CREATE TABLE employees (id INT, first_name VARCHAR(50), last_name VARCHAR(50), job_title VARCHAR(50), department VARCHAR(50), age INT, salary DECIMAL(10,2), PRIMARY KEY (id)); INSERT INTO employees (id, first_name, last_name, job_title, department, age, salary) VALUES (1, 'John', 'Doe', 'Engineer', 'Mining', 35, 80000.00), (2, 'Jane', 'Doe', 'Operator', 'Mining', 28, 60000.00), (3, 'Mike', 'Johnson', 'Manager', 'Environment', 45, 90000.00);", "sql": "SELECT AVG(age), AVG(salary) FROM employees WHERE job_title = 'Engineer';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total quantity of resources depleted in each region for the past 5 years?", "schema": "CREATE TABLE resources (id INT, region TEXT, quantity FLOAT); CREATE TABLE depletions (resource_id INT, year INT, quantity FLOAT); INSERT INTO resources (id, region, quantity) VALUES (1, 'Region A', 50000.0), (2, 'Region B', 60000.0); INSERT INTO depletions (resource_id, year, quantity) VALUES (1, 2017, 5000.0), (1, 2018, 5500.0), (1, 2019, 6000.0), (1, 2020, 6500.0), (1, 2021, 7000.0), (2, 2017, 6000.0), (2, 2018, 6500.0), (2, 2019, 7000.0), (2, 2020, 7500.0), (2, 2021, 8000.0);", "sql": "SELECT resources.region, SUM(depletions.quantity) FROM resources INNER JOIN depletions ON resources.id = depletions.resource_id WHERE depletions.year BETWEEN 2017 AND 2021 GROUP BY resources.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "What is the trend in mental health parity violations for each state over the past four years?", "schema": "CREATE TABLE mental_health_parity_trend (state VARCHAR(2), year INT, violations INT); INSERT INTO mental_health_parity_trend (state, year, violations) VALUES ('CA', 2018, 10), ('CA', 2019, 15), ('CA', 2020, 20), ('CA', 2021, 25), ('NY', 2018, 15), ('NY', 2019, 20), ('NY', 2020, 25), ('NY', 2021, 30), ('TX', 2018, 5), ('TX', 2019, 10), ('TX', 2020, 15), ('TX', 2021, 20);", "sql": "SELECT m.state, m.year, m.violations, LAG(m.violations) OVER (PARTITION BY m.state ORDER BY m.year) as prev_year_violations FROM mental_health_parity_trend m;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 158, "num_statements": 1} {"question": "What is the total energy consumption per mining company, and the average energy consumption per mining operation?", "schema": "CREATE TABLE mining_companies (company_id INT, company_name TEXT); INSERT INTO mining_companies (company_id, company_name) VALUES (1001, 'Mining Corp A'), (1002, 'Mining Corp B'), (1003, 'Mining Corp C'); CREATE TABLE mining_operations (operation_id INT, operation_name TEXT, country TEXT, energy_consumption FLOAT, company_id INT); INSERT INTO mining_operations (operation_id, operation_name, country, energy_consumption, company_id) VALUES (1, 'Porgera Mine', 'Papua New Guinea', 20000, 1001), (2, 'Cerro Verde Mine', 'Peru', 30000, 1001), (3, 'Gruyere Mine', 'Australia', 40000, 1002), (4, 'Veladero Mine', 'Argentina', 50000, 1002), (5, 'Kidd Mine', 'Canada', 60000, 1003), (6, 'Ducktown Mine', 'USA', 70000, 1003);", "sql": "SELECT mining_companies.company_name, SUM(mining_operations.energy_consumption) AS total_energy_consumption FROM mining_companies JOIN mining_operations ON mining_companies.company_id = mining_operations.company_id GROUP BY mining_companies.company_name; SELECT AVG(energy_consumption) AS average_energy_consumption FROM mining_operations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 339, "num_statements": 2} {"question": "What is the minimum and maximum water pH for each tank in the 'tank_data' table?", "schema": "CREATE TABLE tank_data (tank_id INT, species VARCHAR(255), water_ph DECIMAL(5,2)); INSERT INTO tank_data (tank_id, species, water_ph) VALUES (1, 'Tilapia', 7.5), (2, 'Salmon', 6.0), (3, 'Tilapia', 7.8), (4, 'Catfish', 7.2), (5, 'Salmon', 6.5);", "sql": "SELECT tank_id, MIN(water_ph) as min_ph, MAX(water_ph) as max_ph FROM tank_data GROUP BY tank_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the yearly growth rate in revenue for the \"hip-hop\" genre in the Asian region?", "schema": "CREATE TABLE YearlyRevenue(id INT, genre VARCHAR(10), region VARCHAR(10), revenue FLOAT, year INT);", "sql": "SELECT (SUM(revenue) OVER (PARTITION BY genre ORDER BY year ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) - SUM(revenue))/SUM(revenue) OVER (PARTITION BY genre) AS yearly_growth_rate FROM YearlyRevenue WHERE genre = 'hip-hop' AND region = 'Asian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 251, "num_statements": 1} {"question": "What is the average sales figure for drugs that were approved by the EMA and contain the word 'Plus' in their name?", "schema": "CREATE TABLE drug (id INT, name TEXT, approval_authority TEXT, sales FLOAT); INSERT INTO drug (id, name, approval_authority, sales) VALUES (1, 'DrugA Plus', 'EMA', 30000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (2, 'DrugB', 'EMA', 40000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (3, 'PlusDrug', 'EMA', 50000000);", "sql": "SELECT AVG(sales) FROM drug WHERE approval_authority = 'EMA' AND name LIKE '%Plus%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average CO2 emission of each textile mill, grouped by country and displayed in ascending order based on the emission amount?", "schema": "CREATE TABLE CO2Emission (mill TEXT, country TEXT, co2_kg FLOAT); INSERT INTO CO2Emission (mill, country, co2_kg) VALUES ('MillA', 'China', 120.5), ('MillB', 'Bangladesh', 80.3), ('MillC', 'Vietnam', 160.1), ('MillD', 'India', 95.6);", "sql": "SELECT country, AVG(co2_kg) as avg_co2 FROM CO2Emission GROUP BY country ORDER BY avg_co2 ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the location that was founded 1798", "schema": "CREATE TABLE table_2293402_2 (location VARCHAR, founded VARCHAR)", "sql": "SELECT location FROM table_2293402_2 WHERE founded = 1798;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the percentage of Indigenous employees in the Mining department?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Gender VARCHAR(50), Community VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name, Department, Gender, Community) VALUES (8, 'Emily Brown', 'Mining', 'Female', 'Indigenous'); INSERT INTO Employees (EmployeeID, Name, Department, Gender, Community) VALUES (9, 'Michael White', 'Mining', 'Male', 'Indigenous');", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Employees WHERE Department = 'Mining')) AS Percentage FROM Employees WHERE Department = 'Mining' AND Community = 'Indigenous';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "What is the distribution of energy sources in rural areas?", "schema": "CREATE TABLE energy_sources (location VARCHAR(50), source VARCHAR(50), percentage FLOAT);", "sql": "SELECT source, SUM(percentage) AS percentage FROM energy_sources WHERE location = 'rural' GROUP BY source;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the market share of electric vehicles in India?", "schema": "CREATE TABLE VehicleSales (Type VARCHAR(50), Country VARCHAR(50), Sales INT); INSERT INTO VehicleSales (Type, Country, Sales) VALUES ('Electric', 'India', 50000), ('Gasoline', 'India', 2000000), ('Diesel', 'India', 1500000);", "sql": "SELECT (Sales * 100.0 / (SELECT SUM(Sales) FROM VehicleSales WHERE Country = 'India')) FROM VehicleSales WHERE Type = 'Electric' AND Country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total sum of 50m splits for josefin lillhage in lanes above 8?", "schema": "CREATE TABLE table_name_24 (split__50m_ INTEGER, name VARCHAR, lane VARCHAR)", "sql": "SELECT SUM(split__50m_) FROM table_name_24 WHERE name = 'josefin lillhage' AND lane > 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Delete records in the CargoTable that have a weight of zero", "schema": "CREATE TABLE CargoTable (CargoId INT PRIMARY KEY, VesselId INT, CargoName VARCHAR(50), Weight INT);", "sql": "DELETE FROM CargoTable WHERE Weight = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Number of public schools in each region of Brazil in 2021.", "schema": "CREATE TABLE schools (id INT, name VARCHAR(50), region VARCHAR(50), country VARCHAR(50), year INT); INSERT INTO schools (id, name, region, country, year) VALUES (1, 'Escola Estadual', 'Sudeste', 'Brazil', 2021), (2, 'Colégio Municipal', 'Nordeste', 'Brazil', 2021), (3, 'Escola Municipal', 'Centro-Oeste', 'Brazil', 2021), (4, 'Escola Federal', 'Norte', 'Brazil', 2021), (5, 'Escola Particular', 'Sul', 'Brazil', 2021);", "sql": "SELECT region, COUNT(*) FROM schools WHERE country = 'Brazil' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the sum of swing to gain with a winning party 2007 of Conservative with a rank smaller than 5?", "schema": "CREATE TABLE table_name_70 (swing_to_gain INTEGER, winning_party_2007 VARCHAR, rank VARCHAR)", "sql": "SELECT SUM(swing_to_gain) FROM table_name_70 WHERE winning_party_2007 = 'conservative' AND rank < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Get the total energy storage capacity (MWh) in France", "schema": "CREATE TABLE energy_storage (id INT, country VARCHAR(50), capacity FLOAT); INSERT INTO energy_storage (id, country, capacity) VALUES (1, 'United Kingdom', 3000), (2, 'Germany', 4000), (3, 'France', 2500);", "sql": "SELECT SUM(capacity) FROM energy_storage WHERE country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Which countries participated in space exploration missions to Venus?", "schema": "CREATE TABLE Venus_Missions (Mission_ID INT, Mission_Name VARCHAR(50), Country VARCHAR(50), Launch_Year INT, PRIMARY KEY (Mission_ID)); INSERT INTO Venus_Missions (Mission_ID, Mission_Name, Country, Launch_Year) VALUES (1, 'Venera 7', 'Soviet Union', 1970), (2, 'Magellan', 'United States', 1989), (3, 'Akatsuki', 'Japan', 2010);", "sql": "SELECT DISTINCT Country FROM Venus_Missions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "How many unique customers prefer cruelty-free cosmetics and have made a purchase in the last month?", "schema": "CREATE TABLE sales (sale_id INT, customer_id INT, product_id INT, sale_date DATE); INSERT INTO sales (sale_id, customer_id, product_id, sale_date) VALUES (1, 1001, 1, '2022-03-05'), (2, 1002, 2, '2022-03-06'), (3, 1003, 3, '2022-03-07'), (4, 1004, 4, '2022-03-08'), (5, 1001, 5, '2022-04-01'), (6, 1005, 1, '2022-04-02'); CREATE TABLE consumer_preferences (preference_id INT, customer_id INT, preference VARCHAR(255)); INSERT INTO consumer_preferences (preference_id, customer_id, preference) VALUES (1, 1001, 'Cruelty-free'), (2, 1002, 'Vegan'), (3, 1003, 'Natural'), (4, 1004, 'Cruelty-free'), (5, 1005, 'Cruelty-free');", "sql": "SELECT COUNT(DISTINCT sales.customer_id) FROM sales JOIN consumer_preferences ON sales.customer_id = consumer_preferences.customer_id WHERE preference = 'Cruelty-free' AND sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 1158).", "schema": null, "sql": "CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the leading scorer on the game played on February 25?", "schema": "CREATE TABLE table_name_28 (leading_scorer VARCHAR, date VARCHAR)", "sql": "SELECT leading_scorer FROM table_name_28 WHERE date = 'february 25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which museums have the highest number of modern art pieces?", "schema": "CREATE TABLE Museums (id INT, name VARCHAR(50), type VARCHAR(50)); INSERT INTO Museums (id, name, type) VALUES (1, 'Metropolitan Museum', 'Art'), (2, 'Natural History Museum', 'Science'); CREATE TABLE ArtPieces (id INT, title VARCHAR(50), museumId INT, artType VARCHAR(50)); INSERT INTO ArtPieces (id, title, museumId, artType) VALUES (1, 'Mona Lisa', 1, 'Modern'), (2, 'Starry Night', 1, 'Modern'), (3, 'Dinosaur Fossil', 2, 'Ancient');", "sql": "SELECT Museums.name FROM Museums JOIN ArtPieces ON Museums.id = ArtPieces.museumId WHERE ArtPieces.artType = 'Modern' GROUP BY Museums.name ORDER BY COUNT(*) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Update the genre of the artist 'Selena Gomez' to 'Pop-Folk'", "schema": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100), Genre VARCHAR(50));", "sql": "UPDATE Artists SET Genre = 'Pop-Folk' WHERE ArtistName = 'Selena Gomez';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the community education programs that have received more than 500 donations.", "schema": "CREATE TABLE if NOT EXISTS community_education (program_id INT, program_name VARCHAR(50), donation_count INT); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (1, 'Wildlife Conservation 101', 500); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (2, 'Endangered Species Awareness', 300); INSERT INTO community_education (program_id, program_name, donation_count) VALUES (3, 'Habitat Protection Techniques', 700);", "sql": "SELECT program_name FROM community_education WHERE donation_count > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Add new mobile_subscribers from historically underrepresented communities.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, community_representation VARCHAR(30)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, community_representation) VALUES (5, 35.6, 'Latinx'), (6, 45.7, 'Black/African American'), (7, 55.8, 'Native American'), (8, 65.9, 'Asian');", "sql": "INSERT INTO mobile_subscribers (subscriber_id, data_usage, community_representation) VALUES (9, 75.1, 'Pacific Islander'), (10, 85.2, 'Two or More Races'), (11, 95.3, 'Decline to State'), (12, 105.4, 'Not Hispanic or Latino');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "Create a table named 'sales_tax' with columns 'region' and 'tax_percentage'", "schema": "CREATE TABLE sales_tax (region VARCHAR(50), tax_percentage DECIMAL(5,2));", "sql": "CREATE TABLE sales_tax (region VARCHAR(50), tax_percentage DECIMAL(5,2));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Add a new autonomous bus to the public transportation fleet in Vancouver.", "schema": "CREATE TABLE public_transportation (transport_id INT, type VARCHAR(20), city VARCHAR(20)); INSERT INTO public_transportation (transport_id, type, city) VALUES (1, 'Bus', 'Vancouver'), (2, 'Tram', 'Vancouver'), (3, 'Train', 'Vancouver');", "sql": "INSERT INTO public_transportation (transport_id, type, city) VALUES (4, 'Autonomous Bus', 'Vancouver');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which CFL team did the player from British Columbia get drafted to", "schema": "CREATE TABLE table_16441561_5 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT cfl_team FROM table_16441561_5 WHERE college = 'British Columbia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of bronzes for teams with more than 0 silver and a total under 1?", "schema": "CREATE TABLE table_name_22 (bronze INTEGER, silver VARCHAR, total VARCHAR)", "sql": "SELECT SUM(bronze) FROM table_name_22 WHERE silver > 0 AND total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where does CF Pachuca play?", "schema": "CREATE TABLE table_name_64 (game_site VARCHAR, opponent VARCHAR)", "sql": "SELECT game_site FROM table_name_64 WHERE opponent = 'cf pachuca';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the order for a red list of 7 in the didelphidae family?", "schema": "CREATE TABLE table_name_91 (order VARCHAR, red_list VARCHAR, family VARCHAR)", "sql": "SELECT order FROM table_name_91 WHERE red_list = 7 AND family = 'didelphidae';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show the number of workers in each department by gender for the past month.", "schema": "CREATE TABLE Workforce (ID INT, Department VARCHAR(255), Gender VARCHAR(255), HireDate DATE); INSERT INTO Workforce (ID, Department, Gender, HireDate) VALUES (1, 'Mining', 'Male', '2021-12-01'), (2, 'Mining', 'Male', '2021-11-01'), (3, 'Mining', 'Female', '2021-10-01'), (4, 'Maintenance', 'Male', '2021-12-01'), (5, 'Maintenance', 'Female', '2021-11-01'), (6, 'Maintenance', 'Male', '2021-10-01'), (7, 'Environment', 'Female', '2021-12-01'), (8, 'Environment', 'Female', '2021-11-01'), (9, 'Environment', 'Male', '2021-10-01'), (10, 'Safety', 'Male', '2021-12-01'), (11, 'Safety', 'Female', '2021-11-01'), (12, 'Safety', 'Male', '2021-10-01');", "sql": "SELECT Department, Gender, COUNT(*) as Number_of_Workers FROM Workforce WHERE HireDate >= DATEADD(MONTH, -1, GETDATE()) GROUP BY Department, Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What are the names of all chemical compounds that have been used in the production of a hazardous product in the past 6 months?", "schema": "CREATE TABLE chemical_compounds (compound_id INT, name TEXT); CREATE TABLE product_compounds (compound_id INT, product_id INT); CREATE TABLE products (product_id INT, hazardous_flag BOOLEAN, production_date DATE);", "sql": "SELECT chemical_compounds.name FROM chemical_compounds INNER JOIN product_compounds ON chemical_compounds.compound_id = product_compounds.compound_id INNER JOIN products ON product_compounds.product_id = products.product_id WHERE products.hazardous_flag = TRUE AND products.production_date > DATEADD(month, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 322, "num_statements": 1} {"question": "List all exploration projects in the 'Asia-Pacific' region, along with their start dates.", "schema": "CREATE TABLE exploration_projects (project_id INT, project_name VARCHAR(50), region VARCHAR(50), start_date DATE); INSERT INTO exploration_projects (project_id, project_name, region, start_date) VALUES (1, 'Project X', 'Asia-Pacific', '2020-01-01');", "sql": "SELECT project_name, start_date FROM exploration_projects WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total water consumption by all sectors in 2015 and 2016?", "schema": "CREATE TABLE total_consumption (year INT, sector TEXT, consumption FLOAT); INSERT INTO total_consumption (year, sector, consumption) VALUES (2015, 'residential', 123.5), (2015, 'commercial', 234.6), (2016, 'residential', 130.2), (2016, 'commercial', 240.1);", "sql": "SELECT consumption FROM total_consumption WHERE year IN (2015, 2016);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Site when the Result was w52-0?", "schema": "CREATE TABLE table_name_92 (site VARCHAR, result VARCHAR)", "sql": "SELECT site FROM table_name_92 WHERE result = 'w52-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total CO2 emissions for each mining operation in the past 6 months, ordered by the most emitting operation?", "schema": "CREATE TABLE mining_operations (id INT, name TEXT, co2_emissions INT, operation_date DATE); INSERT INTO mining_operations (id, name, co2_emissions, operation_date) VALUES (1, 'Operation X', 12000, '2021-07-01'), (2, 'Operation Y', 15000, '2021-07-01'), (3, 'Operation Z', 18000, '2021-07-01');", "sql": "SELECT name, SUM(co2_emissions) FROM mining_operations WHERE operation_date >= DATEADD(month, -6, GETDATE()) GROUP BY name ORDER BY SUM(co2_emissions) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest overall pick that has c as the position, with an NFL Draft greater than 1977?", "schema": "CREATE TABLE table_name_27 (overall_pick INTEGER, position VARCHAR, nfl_draft VARCHAR)", "sql": "SELECT MAX(overall_pick) FROM table_name_27 WHERE position = 'c' AND nfl_draft > 1977;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "List the mining sites in the 'Asia-Pacific' region with environmental impact scores above 85.", "schema": "CREATE TABLE mining_sites (id INT, site_name VARCHAR(50), location VARCHAR(50), environmental_score FLOAT); INSERT INTO mining_sites (id, site_name, location, environmental_score) VALUES (1, 'Site A', 'Australia', 82.50);", "sql": "SELECT site_name, environmental_score FROM mining_sites WHERE location LIKE 'Asia-Pacific' AND environmental_score > 85.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 311).", "schema": null, "sql": "INSERT INTO temporal_mltrng (id, valid_at) VALUES ('[2,3)', datemultirange(daterange('2005-01-01', '2006-01-01'))) ON CONFLICT DO NOTHING;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of team 1 that was after the 2005 season and with a 4-2 score?", "schema": "CREATE TABLE table_name_11 (team_1 VARCHAR, season VARCHAR, score VARCHAR)", "sql": "SELECT team_1 FROM table_name_11 WHERE season > 2005 AND score = '4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team was he on when he finished in 11th position?", "schema": "CREATE TABLE table_24491017_1 (team VARCHAR, position VARCHAR)", "sql": "SELECT team FROM table_24491017_1 WHERE position = '11th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What To par scored 72-71-68=211?", "schema": "CREATE TABLE table_name_29 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_29 WHERE score = 72 - 71 - 68 = 211;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many deep-sea expeditions were conducted by country?'", "schema": "CREATE TABLE deep_sea_expeditions (expedition_id INT, country VARCHAR(50), year INT);", "sql": "SELECT country, COUNT(expedition_id) AS num_expeditions FROM deep_sea_expeditions GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "How many fans attended football games in Los Angeles during 2022?", "schema": "CREATE TABLE games (team VARCHAR(255), city VARCHAR(255), date DATE, attendance INT); INSERT INTO games (team, city, date, attendance) VALUES ('LA Rams', 'Los Angeles', '2022-09-08', 70000), ('LA Chargers', 'Los Angeles', '2022-09-11', 65000);", "sql": "SELECT SUM(attendance) FROM games WHERE city = 'Los Angeles' AND YEAR(date) = 2022 AND sport = 'Football';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "How many 'public' investors have made investments in the 'healthcare' sector?", "schema": "CREATE TABLE investors (investor_id INT, investor_name VARCHAR(30), investor_type VARCHAR(20)); CREATE TABLE investments (investment_id INT, investor_id INT, sector_id INT);", "sql": "SELECT COUNT(*) FROM investments i INNER JOIN investors j ON i.investor_id = j.investor_id WHERE j.investor_type = 'public' AND i.sector_id IN (SELECT sector_id FROM sectors WHERE sector_name = 'healthcare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 29).", "schema": null, "sql": "select format('Hello %%');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('Hello %%')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score when the away team was norwich city?", "schema": "CREATE TABLE table_24887326_8 (score_1 VARCHAR, away_team VARCHAR)", "sql": "SELECT score_1 FROM table_24887326_8 WHERE away_team = 'Norwich City';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the game from September 10, 2000?", "schema": "CREATE TABLE table_name_68 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_68 WHERE date = 'september 10, 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the average distance traveled per day by all electric vehicles in the city of Seattle?", "schema": "CREATE TABLE ElectricVehicles (id INT, make VARCHAR(50), model VARCHAR(50), daily_distance FLOAT, city VARCHAR(50)); INSERT INTO ElectricVehicles (id, make, model, daily_distance, city) VALUES (1, 'Tesla', 'Model 3', 45.6, 'Seattle'); INSERT INTO ElectricVehicles (id, make, model, daily_distance, city) VALUES (2, 'Chevrolet', 'Bolt', 38.2, 'Seattle');", "sql": "SELECT AVG(daily_distance) FROM ElectricVehicles WHERE city = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 315).", "schema": null, "sql": "CREATE OPERATOR <> (\n\tPROCEDURE = isnne,\n\tLEFTARG = ismn,\n\tRIGHTARG = ismn13,\n\tCOMMUTATOR = <>,\n\tNEGATOR = =,\n\tRESTRICT = neqsel,\n\tJOIN = neqjoinsel);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 150, "num_statements": 1} {"question": "Which team won the most titles in the NFL?", "schema": "CREATE TABLE nfl_titles (team VARCHAR(50), titles INT); INSERT INTO nfl_titles (team, titles) VALUES ('Pittsburgh Steelers', 6), ('New England Patriots', 6), ('Dallas Cowboys', 5);", "sql": "SELECT team, MAX(titles) AS most_titles FROM nfl_titles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the minimum ocean acidification level in the Pacific Ocean?", "schema": "CREATE TABLE ocean_acidification (location TEXT, value FLOAT); INSERT INTO ocean_acidification (location, value) VALUES ('Pacific Ocean', 8.1), ('Atlantic Ocean', 8.0);", "sql": "SELECT MIN(value) FROM ocean_acidification WHERE location = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 30).", "schema": null, "sql": "CREATE FUNCTION hs_contains(hstore,hstore)\nRETURNS bool\nAS 'MODULE_PATHNAME','hstore_contains'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Calculate the total population of all animals in Australian conservation programs", "schema": "CREATE TABLE conservation_programs (id INT, program_name VARCHAR(255), location VARCHAR(255)); CREATE TABLE animal_populations (id INT, program_id INT, animal_type VARCHAR(255), population INT); INSERT INTO conservation_programs (id, program_name, location) VALUES (1, 'Australian Wildlife Conservancy', 'Australia'), (2, 'Taronga Conservation Society', 'Australia'); INSERT INTO animal_populations (id, program_id, animal_type, population) VALUES (1, 1, 'Kangaroo', 10000), (2, 1, 'Wallaby', 5000), (3, 2, 'Koala', 8000), (4, 2, 'Wombat', 2000);", "sql": "SELECT SUM(animal_populations.population) FROM conservation_programs INNER JOIN animal_populations ON conservation_programs.id = animal_populations.program_id WHERE conservation_programs.location = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Delete diversity_metrics records for company_id 102", "schema": "CREATE TABLE diversity_metrics (id INT PRIMARY KEY, company_id INT, gender VARCHAR(50), diversity_score DECIMAL(3,2));", "sql": "DELETE FROM diversity_metrics WHERE company_id = 102;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date for attendance more than 20,268", "schema": "CREATE TABLE table_name_47 (date VARCHAR, attendance INTEGER)", "sql": "SELECT date FROM table_name_47 WHERE attendance > 20 OFFSET 268;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lane for notes Q, SB and time less than 11.22?", "schema": "CREATE TABLE table_name_31 (lane INTEGER, notes VARCHAR, time___sec__ VARCHAR)", "sql": "SELECT SUM(lane) FROM table_name_31 WHERE notes = 'q, sb' AND time___sec__ < 11.22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player has 14 wins?", "schema": "CREATE TABLE table_name_83 (player VARCHAR, wins VARCHAR)", "sql": "SELECT player FROM table_name_83 WHERE wins = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 9).", "schema": null, "sql": "create table lp_null partition of lp for values in (null);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year and the corresponding ceremony was the english titled movie \"time out\" submitted?", "schema": "CREATE TABLE table_22102732_1 (year__ceremony_ VARCHAR, english_title VARCHAR)", "sql": "SELECT year__ceremony_ FROM table_22102732_1 WHERE english_title = 'Time Out';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When dayton is the team what is the record?", "schema": "CREATE TABLE table_29556461_8 (record VARCHAR, team VARCHAR)", "sql": "SELECT record FROM table_29556461_8 WHERE team = 'Dayton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest November that has a game less than 12, and @ detroit red wings as the opponent?", "schema": "CREATE TABLE table_name_57 (november INTEGER, game VARCHAR, opponent VARCHAR)", "sql": "SELECT MAX(november) FROM table_name_57 WHERE game < 12 AND opponent = '@ detroit red wings';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What percentage of faculty members in the Physics department are female?", "schema": "CREATE TABLE faculty (id INT, faculty_name VARCHAR(255), department VARCHAR(255), gender VARCHAR(255)); INSERT INTO faculty (id, faculty_name, department, gender) VALUES (1, 'Faculty1', 'Physics', 'Male'), (2, 'Faculty2', 'Physics', 'Female'), (3, 'Faculty3', 'Physics', 'Male'), (4, 'Faculty4', 'Mathematics', 'Female'), (5, 'Faculty5', 'Mathematics', 'Male');", "sql": "SELECT (COUNT(*) FILTER (WHERE gender = 'Female')) * 100.0 / COUNT(*) as percentage FROM faculty WHERE department = 'Physics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 75).", "schema": null, "sql": "SELECT * FROM v_window;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM v_window) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "What is the average checking account balance in the Boston branch?", "schema": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), branch VARCHAR(20), balance DECIMAL(10,2)); INSERT INTO accounts (customer_id, account_type, branch, balance) VALUES (1, 'Savings', 'New York', 5000.00), (2, 'Checking', 'New York', 7000.00), (3, 'Checking', 'Boston', 8000.00), (4, 'Savings', 'Boston', 4000.00);", "sql": "SELECT AVG(balance) FROM accounts WHERE account_type = 'Checking' AND branch = 'Boston';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Find the number of sustainable tourism activities in each country.", "schema": "CREATE TABLE SustainableTourismActivities (activity_id INT, activity_name TEXT, country TEXT, local_economic_impact FLOAT); INSERT INTO SustainableTourismActivities (activity_id, activity_name, country, local_economic_impact) VALUES (1, 'Biking Tour', 'Portugal', 12000.0), (2, 'Hiking Adventure', 'Portugal', 15000.0), (3, 'Surfing Lesson', 'Australia', 8000.0);", "sql": "SELECT country, COUNT(*) FROM SustainableTourismActivities GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What primary sponsor has the owner Rick Hendrick and their crew chief is Alan Gustafson?", "schema": "CREATE TABLE table_name_50 (primary_sponsor_s_ VARCHAR, owner_s_ VARCHAR, crew_chief VARCHAR)", "sql": "SELECT primary_sponsor_s_ FROM table_name_50 WHERE owner_s_ = 'rick hendrick' AND crew_chief = 'alan gustafson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What is the maximum cargo weight transported by any vessel for each destination?", "schema": "CREATE TABLE cargo_data(id INT, vessel_name VARCHAR(50), destination VARCHAR(50), cargo_weight DECIMAL(5,2)); INSERT INTO cargo_data(id, vessel_name, destination, cargo_weight) VALUES (1, 'Vessel A', 'Port A', 200.0), (2, 'Vessel B', 'Port A', 250.0);", "sql": "SELECT destination, MAX(cargo_weight) FROM cargo_data GROUP BY destination;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What title runs for 3:22?", "schema": "CREATE TABLE table_name_5 (title VARCHAR, time VARCHAR)", "sql": "SELECT title FROM table_name_5 WHERE time = '3:22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has a score less than 68, and paul casey as the player?", "schema": "CREATE TABLE table_name_95 (country VARCHAR, score VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_95 WHERE score < 68 AND player = 'paul casey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Which programs have the highest average donation in the 'Programs' and 'Donations' tables?", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName VARCHAR(50)); CREATE TABLE Donations (DonorID INT, ProgramID INT, Amount DECIMAL(10, 2)); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, 'Youth Mentoring'), (2, 'Food Bank'), (3, 'Climate Action'); INSERT INTO Donations (DonorID, ProgramID, Amount) VALUES (1, 1, 500.00), (2, 2, 250.00), (3, 1, 1000.00), (4, 3, 750.00);", "sql": "SELECT Programs.ProgramName, AVG(Donations.Amount) as AverageDonation FROM Programs INNER JOIN Donations ON Programs.ProgramID = Donations.ProgramID GROUP BY Programs.ProgramID ORDER BY AverageDonation DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team had a qualifying 2 time of 1:01.093?", "schema": "CREATE TABLE table_name_11 (team VARCHAR, qual_2 VARCHAR)", "sql": "SELECT team FROM table_name_11 WHERE qual_2 = '1:01.093';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the coronie with a 0.7% nickerie?", "schema": "CREATE TABLE table_name_92 (coronie VARCHAR, nickerie VARCHAR)", "sql": "SELECT coronie FROM table_name_92 WHERE nickerie = '0.7%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total number of healthcare providers in the 'rural_clinic' table who are specialized in cardiology?", "schema": "CREATE TABLE rural_clinic (id INT, name VARCHAR(50), specialty VARCHAR(50)); INSERT INTO rural_clinic (id, name, specialty) VALUES (1, 'John Doe', 'Cardiology'), (2, 'Jane Smith', 'Pediatrics'), (3, 'Michael Brown', 'Cardiology');", "sql": "SELECT COUNT(*) FROM rural_clinic WHERE specialty = 'Cardiology';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the name and capacity of each sports stadium in the state of Florida?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); CREATE TABLE stadiums (id INT, state_id INT, name VARCHAR(255), capacity INT);", "sql": "SELECT name, capacity FROM stadiums WHERE state_id = (SELECT id FROM states WHERE name = 'Florida');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Which AI system has the lowest explainability rating in the legal domain?", "schema": "CREATE TABLE explainability_domain (ai_system TEXT, domain TEXT, rating FLOAT); INSERT INTO explainability_domain (ai_system, domain, rating) VALUES ('AI Judge', 'Legal', 0.50), ('Legal Analytics Tool', 'Legal', 0.85), ('AI Contract Review', 'Legal', 0.75);", "sql": "SELECT ai_system, MIN(rating) FROM explainability_domain WHERE domain = 'Legal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total number of military technology patents filed by Asian countries?", "schema": "CREATE TABLE MilitaryPatents (id INT PRIMARY KEY, country VARCHAR(50), technology VARCHAR(50), date DATE); INSERT INTO MilitaryPatents (id, country, technology, date) VALUES (1, 'China', 'Stealth Technology', '2020-01-01'), (2, 'Japan', 'Artificial Intelligence', '2019-12-15'), (3, 'South Korea', 'Cyber Security', '2020-03-02');", "sql": "SELECT country, COUNT(*) as total_patents FROM MilitaryPatents WHERE country IN ('China', 'Japan', 'South Korea') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many goals when the points 1 is 38 and the played number is less than 42?", "schema": "CREATE TABLE table_name_24 (goals_for INTEGER, points_1 VARCHAR, played VARCHAR)", "sql": "SELECT SUM(goals_for) FROM table_name_24 WHERE points_1 = 38 AND played < 42;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which current venues location is Mason, Ohio?", "schema": "CREATE TABLE table_14903081_1 (current_venue VARCHAR, location VARCHAR)", "sql": "SELECT current_venue FROM table_14903081_1 WHERE location = 'Mason, Ohio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the total number of climate mitigation projects in Asia that were completed before 2015?", "schema": "CREATE TABLE climate_mitigation_projects (project_id INT, project_name TEXT, location TEXT, project_type TEXT, start_date DATE, end_date DATE);", "sql": "SELECT COUNT(project_id) FROM climate_mitigation_projects WHERE location LIKE '%Asia%' AND end_date < '2015-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Insert a new record for a donation of $500 made by a corporate donor named \"ABC Corp\" on March 15, 2022.", "schema": "CREATE TABLE donations (id INT, donor_id INT, donation_date DATE, amount_donated DECIMAL(10,2)); CREATE TABLE donors (id INT, name TEXT, donor_type TEXT);", "sql": "INSERT INTO donations (id, donor_id, donation_date, amount_donated) VALUES (1, (SELECT id FROM donors WHERE name = 'ABC Corp' AND donor_type = 'Corporate' LIMIT 1), '2022-03-15', 500); INSERT INTO donors (id, name, donor_type) VALUES (1, 'ABC Corp', 'Corporate') ON DUPLICATE KEY UPDATE id = id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 295, "num_statements": 2} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 3).", "schema": null, "sql": "select encrypt('foo', '01234589', 'des');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Which drugs were tested in clinical trials and approved, grouped by approval status?", "schema": "CREATE TABLE ClinicalTrials (trial_id INT, drug_name VARCHAR(255), trial_status VARCHAR(255)); INSERT INTO ClinicalTrials (trial_id, drug_name, trial_status) VALUES (1, 'DrugD', 'Completed'), (2, 'DrugD', 'Failed'), (3, 'DrugE', 'Completed'), (4, 'DrugF', 'In Progress'); CREATE TABLE DrugApproval (drug_name VARCHAR(255), approval_date DATE); INSERT INTO DrugApproval (drug_name, approval_date) VALUES ('DrugD', '2021-03-10'), ('DrugE', '2020-11-25');", "sql": "SELECT ct.trial_status, ct.drug_name FROM ClinicalTrials ct JOIN DrugApproval da ON ct.drug_name = da.drug_name GROUP BY ct.trial_status, ct.drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 37).", "schema": null, "sql": "SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show teacher_id, training_type, and completed_date from the teacher_training table", "schema": "CREATE TABLE teacher_training (id INT PRIMARY KEY, teacher_id INT, training_type VARCHAR(255), completed_date DATE);", "sql": "SELECT teacher_id, training_type, completed_date FROM teacher_training;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the tournament played 13 November 2000?", "schema": "CREATE TABLE table_name_35 (tournament VARCHAR, date VARCHAR)", "sql": "SELECT tournament FROM table_name_35 WHERE date = '13 november 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: From what country is the player with a score of 68-71-76=215?", "schema": "CREATE TABLE table_name_70 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_70 WHERE score = 68 - 71 - 76 = 215;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many solar energy projects are there in Canada with a commissioned date after 2015?", "schema": "CREATE TABLE solar_energy (project_id INT, project_name VARCHAR(255), country VARCHAR(255), commissioned_date DATE);", "sql": "SELECT COUNT(*) FROM solar_energy WHERE country = 'Canada' AND commissioned_date > '2015-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Visitor played on January 17?", "schema": "CREATE TABLE table_name_67 (visitor VARCHAR, date VARCHAR)", "sql": "SELECT visitor FROM table_name_67 WHERE date = 'january 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 54 holes for The Open Championship (4)?", "schema": "CREATE TABLE table_name_7 (championship VARCHAR)", "sql": "SELECT 54 AS _holes FROM table_name_7 WHERE championship = 'the open championship (4)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'indexing': Write the SELECT query (example 230).", "schema": null, "sql": "select c.relname, pg_get_indexdef(indexrelid)\n from pg_class c join pg_index i on c.oid = i.indexrelid\n where indrelid::regclass::text like 'idxpart%'\n order by indexrelid::regclass::text collate \"C\";", "explanation": "Regression test for Indexing in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select c.relname, pg_get_indexdef(indexrelid)\n from pg_class c join pg_index i on c.oid = i.indexrelid\n where indrelid::regclass::text like 'idxpart%'\n order by indexrelid::regclass::text collate \"C\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 203, "num_statements": 1} {"question": "What are the names of all satellites launched by India?", "schema": "CREATE TABLE Satellites (Id INT, Name VARCHAR(50), LaunchYear INT, Country VARCHAR(50)); INSERT INTO Satellites (Id, Name, LaunchYear, Country) VALUES (1, 'Sat1', 2018, 'USA'), (2, 'Sat2', 2019, 'USA'), (3, 'Sat3', 2020, 'USA'), (4, 'Sat4', 2020, 'China'), (5, 'Sat5', 2020, 'Russia'), (6, 'Sat6', 2018, 'Germany'), (7, 'Sat7', 2019, 'India'), (8, 'Sat8', 2020, 'India'), (9, 'Sat9', 2020, 'India');", "sql": "SELECT Name FROM Satellites WHERE Country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average salary of workers in the manufacturing industry, grouped by their job role and location?", "schema": "CREATE TABLE salaries (worker_id INT, job_role VARCHAR(255), location VARCHAR(255), salary FLOAT);", "sql": "SELECT location, job_role, AVG(salary) FROM salaries GROUP BY location, job_role;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average age of astronauts at their first space mission?", "schema": "CREATE TABLE astronauts(id INT, name VARCHAR(50), age INT, first_mission_year INT); INSERT INTO astronauts VALUES(1, 'Yang Liwei', 38, 2003), (2, 'Valentina Tereshkova', 26, 1963);", "sql": "SELECT AVG(age - first_mission_year) FROM astronauts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 131).", "schema": null, "sql": "SELECT (timestamp without time zone 'tomorrow' > 'now') as \"True\";", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (timestamp without time zone 'tomorrow' > 'now') as \"True\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the high points for record 9-4", "schema": "CREATE TABLE table_22654073_6 (high_points VARCHAR, record VARCHAR)", "sql": "SELECT high_points FROM table_22654073_6 WHERE record = '9-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the score in the final for runner-up and hard surface with opponents being michaël llodra nenad zimonjić", "schema": "CREATE TABLE table_name_1 (score_in_the_final VARCHAR, opponents_in_the_final VARCHAR, outcome VARCHAR, surface VARCHAR)", "sql": "SELECT score_in_the_final FROM table_name_1 WHERE outcome = 'runner-up' AND surface = 'hard' AND opponents_in_the_final = 'michaël llodra nenad zimonjić';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Insert a new product with an ID of 5, a department of 'home_decor', and a price of 14.99.", "schema": "CREATE TABLE products (product_id INT, department VARCHAR(20), price DECIMAL(5,2));", "sql": "INSERT INTO products (product_id, department, price) VALUES (5, 'home_decor', 14.99);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the number of donors and total donation amount for each program?", "schema": "CREATE TABLE donations (donor_id INT, program_id VARCHAR(20), amount DECIMAL(10,2)); INSERT INTO donations (donor_id, program_id, amount) VALUES (1, 'Education', 500.00), (2, 'Health', 300.00), (3, 'Education', 250.00);", "sql": "SELECT program_id, COUNT(DISTINCT donor_id) AS num_donors, SUM(amount) AS total_donations FROM donations GROUP BY program_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many FA cups for the player with under 5 champs, 0 league cups, and over 3 total?", "schema": "CREATE TABLE table_name_13 (fa_cup INTEGER, total VARCHAR, championship VARCHAR, league_cup VARCHAR)", "sql": "SELECT SUM(fa_cup) FROM table_name_13 WHERE championship < 5 AND league_cup = 0 AND total > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Show me the cybersecurity policies that were dropped in the last week.", "schema": "CREATE TABLE cybersecurity_policies (id INT, name VARCHAR(50), description TEXT, date DATE); INSERT INTO cybersecurity_policies (id, name, description, date) VALUES (1, 'Incident response policy', 'Outlines the process for responding to security incidents', '2022-04-15'), (2, 'Access control policy', 'Defines who has access to what resources', '2022-05-05'), (3, 'Password policy', 'Defines password complexity and length requirements', '2022-05-12');", "sql": "SELECT * FROM cybersecurity_policies WHERE date >= DATEADD(week, -1, GETDATE()) AND name = 'Password policy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Which country has the most exploration projects?", "schema": "CREATE TABLE exploration_projects (project_id INT, project_name VARCHAR(50), country VARCHAR(50)); INSERT INTO exploration_projects (project_id, project_name, country) VALUES (1, 'Project X', 'Brazil'), (2, 'Project Y', 'Nigeria'), (3, 'Project Z', 'Brazil');", "sql": "SELECT countries.country_name, COUNT(exploration_projects.project_id) FROM exploration_projects INNER JOIN countries ON exploration_projects.country = countries.country_name GROUP BY countries.country_name ORDER BY COUNT(exploration_projects.project_id) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1} {"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 124).", "schema": null, "sql": "SELECT * FROM pg_split_walfile_name('invalid');", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_split_walfile_name('invalid')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest cuts made that had a Top-25 less than 6 and wins greater than 0?", "schema": "CREATE TABLE table_name_63 (cuts_made INTEGER, top_25 VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(cuts_made) FROM table_name_63 WHERE top_25 < 6 AND wins < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the percentage of employees from underrepresented racial or ethnic groups, by department, for the entire company?", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), race VARCHAR(50)); INSERT INTO employees (id, name, department, race) VALUES (1, 'John Doe', 'IT', 'Caucasian'); INSERT INTO employees (id, name, department, race) VALUES (2, 'Jane Smith', 'HR', 'African American');", "sql": "SELECT department, SUM(CASE WHEN race IN ('African American', 'Hispanic', 'Native American', 'Asian', 'Pacific Islander') THEN 1 ELSE 0 END) / COUNT(*) * 100 AS pct_underrepresented FROM employees GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "PostgreSQL regression test 'hash_func': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT v as value, hash_array(v)::bit(32) as standard,\n hash_array_extended(v, 0)::bit(32) as extended0,\n hash_array_extended(v, 1)::bit(32) as extended1\nFROM (VALUES ('{0}'::int4[]), ('{0,1,2,3,4}'), ('{17,18,19,20}'),\n ('{42,34,65,98}'), ('{550273,590027, 870273}'),\n ('{207112489, 807112489}')) x(v)\nWHERE hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)\n OR hash_array(v)::bit(32) = hash_array_extended(v, 1)::bit(32);", "explanation": "Regression test for Hash Func in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT v as value, hash_array(v)::bit(32) as standard,\n hash_array_extended(v, 0)::bit(32) as extended0,\n hash_array_extended(v, 1)::bit(32) as extended1\nFROM (VALUES ('{0}'::int4[]), ('{0,1,2,3,4}'), ('{17,18,19,20}'),\n ('{42,34,65,98}'), ('{550273,590027, 870273}'),\n ('{207112489, 807112489}')) x(v)\nWHERE hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)\n OR hash_array(v)::bit(32) = hash_array_extended(v, 1)::bit(32)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 469, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the tournament that has 15 events, and less than 6 top-25's, how many top-5's did he have?", "schema": "CREATE TABLE table_name_76 (top_5 INTEGER, events VARCHAR, top_25 VARCHAR)", "sql": "SELECT SUM(top_5) FROM table_name_76 WHERE events = 15 AND top_25 < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average rank of the province alborz, which had more than 14526 in 1956?", "schema": "CREATE TABLE table_name_95 (rank INTEGER, province VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_95 WHERE province = 'alborz' AND 1956 > 14526;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "pgTAP test for Partitions (assertion 38).", "schema": null, "sql": "SELECT * FROM check_test(\n partitions_are( 'public', 'parted', '{part1,part2,hide.part3}'::name[] ),\n true,\n 'partitions_are( sch, tab, parts )',\n 'Table public.parted should have the correct partitions',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Partitions.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the king who entered office in 1012 leave office?", "schema": "CREATE TABLE table_name_34 (left_office VARCHAR, entered_office VARCHAR)", "sql": "SELECT left_office FROM table_name_34 WHERE entered_office = '1012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 475).", "schema": null, "sql": "SELECT '\\x80000000'::bytea::int4 AS \"-2147483648\", '\\x7FFFFFFF'::bytea::int4 AS \"2147483647\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '\\x80000000'::bytea::int4 AS \"-2147483648\", '\\x7FFFFFFF'::bytea::int4 AS \"2147483647\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 11).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i<'2004-10-27'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Obtain threat intelligence metrics related to cyber attacks on defense networks", "schema": "CREATE TABLE threat_intelligence (threat_id INT, threat_source VARCHAR(50), attack_vector VARCHAR(50), network_impact FLOAT, date DATE); INSERT INTO threat_intelligence (threat_id, threat_source, attack_vector, network_impact, date) VALUES (1, 'APT10', 'Phishing', 75, '2020-01-05'); INSERT INTO threat_intelligence (threat_id, threat_source, attack_vector, network_impact, date) VALUES (2, 'APT28', 'Malware', 90, '2020-04-10');", "sql": "SELECT threat_source, attack_vector, network_impact, date FROM threat_intelligence;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Bronze of 2, and a Silver smaller than 0 then what is the sum of the gold?", "schema": "CREATE TABLE table_name_99 (gold INTEGER, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_99 WHERE bronze = 2 AND silver < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many bullet tips colors had other features of a blue band on case base?", "schema": "CREATE TABLE table_1036189_1 (bullet_tip_color VARCHAR, other_features VARCHAR)", "sql": "SELECT COUNT(bullet_tip_color) FROM table_1036189_1 WHERE other_features = 'Blue band on case base';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different jockeys ran on 17 Feb 2007?", "schema": "CREATE TABLE table_14981555_3 (jockey VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(jockey) FROM table_14981555_3 WHERE date = '17 Feb 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many county with per capita income being $20,101", "schema": "CREATE TABLE table_1350350_2 (county VARCHAR, per_capita_income VARCHAR)", "sql": "SELECT COUNT(county) FROM table_1350350_2 WHERE per_capita_income = '$20,101';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Which biosensor samples have expression levels higher than 5 for gene 'XYZ'?", "schema": "CREATE SCHEMA if not exists biosensor; USE biosensor; CREATE TABLE if not exists gene_expression (sample_id INT, gene_name VARCHAR(255), expression DECIMAL(5,2)); INSERT INTO gene_expression (sample_id, gene_name, expression) VALUES (1, 'ABC', 3.45), (2, 'ABC', 3.56), (3, 'XYZ', 5.12), (4, 'DEF', 2.98), (5, 'XYZ', 6.25), (6, 'GHI', 4.02);", "sql": "SELECT sample_id, gene_name, expression FROM biosensor.gene_expression WHERE gene_name = 'XYZ' AND expression > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 847).", "schema": null, "sql": "CREATE TABLE tbl1 (c) AS VALUES ('bar'::text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Gecko value for the item that has a Prince XML value of 'no' and a KHTML value of 'yes'?", "schema": "CREATE TABLE table_name_6 (gecko VARCHAR, prince_xml VARCHAR, khtml VARCHAR)", "sql": "SELECT gecko FROM table_name_6 WHERE prince_xml = 'yes' AND khtml = 'yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the season for runner up of judean rebels", "schema": "CREATE TABLE table_name_56 (season VARCHAR, runner_up VARCHAR)", "sql": "SELECT season FROM table_name_56 WHERE runner_up = 'judean rebels';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was in lane 6 with a mark of 2:05.58 SB?", "schema": "CREATE TABLE table_name_84 (name VARCHAR, lane VARCHAR, mark VARCHAR)", "sql": "SELECT name FROM table_name_84 WHERE lane = 6 AND mark = '2:05.58 sb';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Runner-up on April 11?", "schema": "CREATE TABLE table_name_36 (runner_up VARCHAR, week VARCHAR)", "sql": "SELECT runner_up FROM table_name_36 WHERE week = 'april 11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the Blue Jays play against on April 11?", "schema": "CREATE TABLE table_name_52 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_52 WHERE date = 'april 11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 9).", "schema": null, "sql": "SELECT * FROM brin_revmap_data(get_raw_page('test1_a_idx', 0)) LIMIT 5;", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Winnning driver in which lorenzo bandini has the fastest lap as well as the Pole position?", "schema": "CREATE TABLE table_name_21 (winning_driver VARCHAR, fastest_lap VARCHAR, pole_position VARCHAR)", "sql": "SELECT winning_driver FROM table_name_21 WHERE fastest_lap = 'lorenzo bandini' AND pole_position = 'lorenzo bandini';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the rank of the park that had a value of 5,040,000 in 2010?", "schema": "CREATE TABLE table_name_30 (rank VARCHAR)", "sql": "SELECT rank FROM table_name_30 WHERE 2010 = '5,040,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many public meetings have taken place in the Finance department since 2016?", "schema": "CREATE TABLE meetings (id INT, department TEXT, date DATE); INSERT INTO meetings (id, department, date) VALUES (1, 'Education', '2015-01-01'); INSERT INTO meetings (id, department, date) VALUES (2, 'Education', '2016-01-01'); INSERT INTO meetings (id, department, date) VALUES (3, 'Finance', '2015-01-01');", "sql": "SELECT COUNT(*) FROM meetings WHERE department = 'Finance' AND date >= '2016-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many network infrastructure investments were made in a specific country in the last year?", "schema": "CREATE TABLE network_investments (investment_id INT, investment_date DATE, country VARCHAR(50), investment_amount INT);", "sql": "SELECT country, COUNT(investment_id) FROM network_investments WHERE investment_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 36).", "schema": null, "sql": "SELECT tx.ii, tx.jj, tx.kk\n FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))\n AS tx (ii, jj, tt, ii2, kk);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tx.ii, tx.jj, tx.kk\n FROM (J1_TBL t1 (a, b, c) CROSS JOIN J2_TBL t2 (d, e))\n AS tx (ii, jj, tt, ii2, kk)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the maximum recorded depth for a marine species habitat?", "schema": "CREATE TABLE marine_species (id INT, name VARCHAR(255), habitat_type VARCHAR(255), average_depth FLOAT); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (1, 'Clownfish', 'Coral Reef', 20.0); INSERT INTO marine_species (id, name, habitat_type, average_depth) VALUES (2, 'Blue Whale', 'Open Ocean', 200.0); CREATE TABLE ocean_depths (location VARCHAR(255), depth FLOAT); INSERT INTO ocean_depths (location, depth) VALUES ('Mariana Trench', 10994.0); INSERT INTO ocean_depths (location, depth) VALUES ('Sierra Leone Rise', 5791.0);", "sql": "SELECT MAX(od.depth) as max_depth FROM marine_species ms JOIN ocean_depths od ON ms.habitat_type = od.location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "How many geopolitical risk assessments were conducted for Brazil in 2019 and 2020?", "schema": "CREATE TABLE Geopolitical_Risk_Assessments (assessment_id INT, assessment_date DATE, country VARCHAR(50)); INSERT INTO Geopolitical_Risk_Assessments (assessment_id, assessment_date, country) VALUES (1, '2019-05-12', 'Brazil'), (2, '2020-07-03', 'Brazil'), (3, '2021-11-28', 'Brazil');", "sql": "SELECT COUNT(assessment_id) FROM Geopolitical_Risk_Assessments WHERE country = 'Brazil' AND YEAR(assessment_date) IN (2019, 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is David Graham from?", "schema": "CREATE TABLE table_name_94 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_94 WHERE player = 'david graham';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of goals conceded where more than 19 goals were scored, the team had 31 points, and more than 7 draws?", "schema": "CREATE TABLE table_name_80 (goals_conceded__gc_ INTEGER, draw__pe_ VARCHAR, goals_scored__gf_ VARCHAR, points__pts_ VARCHAR)", "sql": "SELECT AVG(goals_conceded__gc_) FROM table_name_80 WHERE goals_scored__gf_ > 19 AND points__pts_ = 31 AND draw__pe_ > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Show a SQL definition from the postgrest project (schema, item 49).", "schema": null, "sql": "create function change_role_statement_timeout(timeout text) returns void as $_$\nbegin\n execute format($$\n alter role current_user set statement_timeout = %L;\n $$, timeout);", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 2} {"question": "Write the DML statement from PostgreSQL regression test 'oid' (example 17).", "schema": null, "sql": "INSERT INTO OID_TBL(f1) VALUES (' - 500');", "explanation": "DML from PostgreSQL core regression test for Oid.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Which counties in 'voting_data' table have less than 10,000 registered voters?", "schema": "CREATE TABLE voting_data (county VARCHAR(255), num_voters INT);", "sql": "SELECT county FROM voting_data WHERE num_voters < 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 808).", "schema": null, "sql": "insert into j2 values(1),(2),(3);", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 181).", "schema": null, "sql": "SELECT * FROM unnest((JSON_QUERY(jsonb '{\"jsa\": [{\"a\": 1, \"b\": [\"foo\"]}, {\"a\": 2, \"c\": {}}, 123]}', '$' RETURNING sqljsonb_rec)).jsa);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM unnest((JSON_QUERY(jsonb '{\"jsa\": [{\"a\": 1, \"b\": [\"foo\"]}, {\"a\": 2, \"c\": {}}, 123]}', '$' RETURNING sqljsonb_rec)).jsa)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team was the opponent on December 30?", "schema": "CREATE TABLE table_name_17 (team VARCHAR, date VARCHAR)", "sql": "SELECT team FROM table_name_17 WHERE date = 'december 30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times did li ju win the womens singles and wang liqin win the mens singles?", "schema": "CREATE TABLE table_28138035_6 (womens_doubles VARCHAR, womens_singles VARCHAR, mens_singles VARCHAR)", "sql": "SELECT COUNT(womens_doubles) FROM table_28138035_6 WHERE womens_singles = 'Li Ju' AND mens_singles = 'Wang Liqin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the maximum calorie burn during 'Cardio' workouts for members residing in 'California'?", "schema": "CREATE TABLE Workouts (MemberID INT, State VARCHAR(20), WorkoutType VARCHAR(20), CaloriesBurned INT); INSERT INTO Workouts (MemberID, State, WorkoutType, CaloriesBurned) VALUES (1, 'California', 'Cardio', 300), (2, 'New York', 'Strength', 250), (3, 'California', 'Cardio', 350);", "sql": "SELECT MAX(CaloriesBurned) FROM Workouts WHERE State = 'California' AND WorkoutType = 'Cardio';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the game ending with a score of 28-43, what is the listed as the final record?", "schema": "CREATE TABLE table_name_86 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_86 WHERE score = '28-43';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Create a view named \"health_trends\" with columns \"metric_name\", \"latest_value\", and \"one_year_ago_value\". Only include metrics with measurement dates within the last year.", "schema": "CREATE TABLE ocean_health_metrics (metric_id INT PRIMARY KEY, metric_name VARCHAR(255), metric_value FLOAT, measurement_date DATE); INSERT INTO ocean_health_metrics (metric_id, metric_name, metric_value, measurement_date) VALUES (1, 'Dissolved oxygen', 6.5, '2022-01-01'), (2, 'pH', 8.1, '2022-01-02'), (3, 'Temperature', 15.2, '2022-01-03'), (4, 'Salinity', 34.8, '2022-01-04');", "sql": "CREATE VIEW health_trends AS SELECT metric_name, MAX(metric_value) AS latest_value, (SELECT metric_value FROM ocean_health_metrics ohm2 WHERE ohm2.metric_name = ohm.metric_name AND ohm2.measurement_date = DATE_SUB(CURDATE(), INTERVAL 1 YEAR)) AS one_year_ago_value FROM ocean_health_metrics ohm WHERE measurement_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY metric_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 379, "num_statements": 1} {"question": "What is the total installed capacity (in kW) of renewable energy projects for each city?", "schema": "CREATE TABLE renewable_projects (id INT, project_name VARCHAR(255), city VARCHAR(255), installed_capacity FLOAT);", "sql": "SELECT city, SUM(installed_capacity) FROM renewable_projects GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of gold medals for the United States with silver medal count greater than 3?", "schema": "CREATE TABLE table_name_26 (gold INTEGER, nation VARCHAR, silver VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_26 WHERE nation = 'united states' AND silver > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many dispensaries exist in Colorado with a valid license in 2023?", "schema": "CREATE TABLE dispensaries (id INT, name TEXT, state TEXT, license_expiry DATE); INSERT INTO dispensaries (id, name, state, license_expiry) VALUES (1, 'Dispensary C', 'Colorado', '2023-05-01');", "sql": "SELECT COUNT(*) as num_dispensaries FROM dispensaries WHERE state = 'Colorado' AND license_expiry >= '2023-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the percentage change in water consumption for households in Tokyo from 2017 to 2018?", "schema": "CREATE TABLE Household_Water_Usage (Household_ID INT, City VARCHAR(20), Year INT, Water_Consumption FLOAT); INSERT INTO Household_Water_Usage (Household_ID, City, Year, Water_Consumption) VALUES (1, 'Tokyo', 2017, 120.5), (2, 'Tokyo', 2018, 110.2);", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Household_Water_Usage WHERE City = 'Tokyo' AND Year = 2017)) - 100.0 AS Percentage_Change FROM Household_Water_Usage WHERE City = 'Tokyo' AND Year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What college did the player for the Hamilton Tiger-Cats go to?", "schema": "CREATE TABLE table_21321804_3 (college VARCHAR, cfl_team VARCHAR)", "sql": "SELECT college FROM table_21321804_3 WHERE cfl_team = 'Hamilton Tiger-Cats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Cover Date of the Story Title Spacehikers (Part 2)?", "schema": "CREATE TABLE table_name_59 (cover_date VARCHAR, story_title VARCHAR)", "sql": "SELECT cover_date FROM table_name_59 WHERE story_title = 'spacehikers (part 2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 90).", "schema": null, "sql": "CREATE TABLE uaccount (\n pguser name primary key,\n seclv int\n);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "How many donors have donated to each cause?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(50), DonationAmount DECIMAL(10,2), CauseID INT);CREATE TABLE Causes (CauseID INT, CauseName VARCHAR(50));", "sql": "SELECT C.CauseName, COUNT(D.DonorID) FROM Donors D JOIN Causes C ON D.CauseID = C.CauseID GROUP BY C.CauseName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many countries were sampled for the index in 2nd place in the LA ranking and 23rd in the world ranking?", "schema": "CREATE TABLE table_19948664_1 (countries_sampled VARCHAR, ranking_la__2_ VARCHAR, world_ranking__1_ VARCHAR)", "sql": "SELECT countries_sampled FROM table_19948664_1 WHERE ranking_la__2_ = '2nd' AND world_ranking__1_ = '23rd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the number of disaster preparedness workshops held in Houston and their respective attendance?\"", "schema": "CREATE TABLE houston_disaster_preparedness (id INT, workshop_name VARCHAR(255), city VARCHAR(255), attendance INT); INSERT INTO houston_disaster_preparedness (id, workshop_name, city, attendance) VALUES (1, 'Hurricane Preparedness', 'Houston', 30);", "sql": "SELECT workshop_name, SUM(attendance) as total_attendance FROM houston_disaster_preparedness WHERE city = 'Houston' GROUP BY workshop_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the minimum energy storage capacity of pumped hydro storage plants in Japan?", "schema": "CREATE TABLE pumped_hydro_storage (id INT, name TEXT, country TEXT, capacity FLOAT); INSERT INTO pumped_hydro_storage (id, name, country, capacity) VALUES (1, 'Kannagawa', 'Japan', 245), (2, 'Okuyoshino', 'Japan', 270), (3, 'Shimizu', 'Japan', 300), (4, 'Okutataragi', 'Japan', 336);", "sql": "SELECT MIN(capacity) FROM pumped_hydro_storage WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the maximum labor practice rating for products in each category, by brand?", "schema": "CREATE TABLE Brands (id INT, brand VARCHAR(255)); INSERT INTO Brands (id, brand) VALUES (1, 'BrandA'), (2, 'BrandB'), (3, 'BrandC'); CREATE TABLE Products (id INT, product VARCHAR(255), category VARCHAR(255), brand_id INT, labor_practice_rating DECIMAL(3, 2)); INSERT INTO Products (id, product, category, brand_id, labor_practice_rating) VALUES (1, 'Product1', 'CategoryA', 1, 4.50), (2, 'Product2', 'CategoryA', 1, 4.75), (3, 'Product3', 'CategoryB', 2, 3.25), (4, 'Product4', 'CategoryB', 2, 3.50), (5, 'Product5', 'CategoryC', 3, 4.00), (6, 'Product6', 'CategoryC', 3, 4.25);", "sql": "SELECT b.brand, p.category, MAX(p.labor_practice_rating) AS max_rating FROM Products p JOIN Brands b ON p.brand_id = b.id GROUP BY b.brand, p.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the average years of experience for investigative journalists in the \"investigative_journalists\" table who are from India?", "schema": "CREATE TABLE investigative_journalists (id INT, name VARCHAR(50), country VARCHAR(50), years_of_experience INT); INSERT INTO investigative_journalists (id, name, country, years_of_experience) VALUES (1, 'John Doe', 'USA', 10), (2, 'Jane Smith', 'Canada', 12), (3, 'Pedro Martinez', 'Mexico', 8), (4, 'Rajesh Patel', 'India', 15), (5, 'Anna Lee', 'India', 20);", "sql": "SELECT AVG(years_of_experience) FROM investigative_journalists WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average CO2 emission reduction (in metric tons) per green building in the 'green_buildings' and 'carbon_offset' schemas, grouped by city?", "schema": "CREATE TABLE green_buildings.green_buildings_data (city VARCHAR(20), green_building BOOLEAN); CREATE TABLE carbon_offset.offset_initiatives (city VARCHAR(20), co2_reduction_tons INT);", "sql": "SELECT city, AVG(co2_reduction_tons / (SELECT COUNT(*) FROM green_buildings.green_buildings_data WHERE green_building = TRUE)) AS avg_co2_reduction_per_green_building FROM carbon_offset.offset_initiatives WHERE green_building = TRUE GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "What is the average amount of socially responsible loans issued by financial institutions in Africa?", "schema": "CREATE TABLE financial_institutions (institution_id INT, institution_name TEXT, region TEXT);CREATE TABLE loans (loan_id INT, institution_id INT, loan_amount DECIMAL, is_socially_responsible BOOLEAN);", "sql": "SELECT AVG(loan_amount) FROM loans JOIN financial_institutions ON loans.institution_id = financial_institutions.institution_id WHERE is_socially_responsible = TRUE AND region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "What is the average water temperature in January for all salmon farms in Norway?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, country TEXT, latitude DECIMAL(9,6), longitude DECIMAL(9,6)); INSERT INTO salmon_farms (id, name, country, latitude, longitude) VALUES (1, 'Farm A', 'Norway', 60.123456, 7.123456), (2, 'Farm B', 'Norway', 62.123456, 8.123456); CREATE TABLE temperature_readings (id INT, farm_id INT, date DATE, temperature DECIMAL(5,2)); INSERT INTO temperature_readings (id, farm_id, date, temperature) VALUES (1, 1, '2022-01-01', 8.5), (2, 1, '2022-01-02', 9.0), (3, 2, '2022-01-01', 7.0), (4, 2, '2022-01-02', 7.5);", "sql": "SELECT AVG(temperature) FROM temperature_readings tr JOIN salmon_farms sf ON tr.farm_id = sf.id WHERE sf.country = 'Norway' AND MONTH(tr.date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "What is the average price of cotton textiles sourced from India?", "schema": "CREATE TABLE textile_sourcing (id INT, material VARCHAR(20), country VARCHAR(20), price DECIMAL(5,2)); INSERT INTO textile_sourcing (id, material, country, price) VALUES (1, 'cotton', 'India', 3.50), (2, 'silk', 'China', 15.00), (3, 'wool', 'Australia', 12.00);", "sql": "SELECT AVG(price) FROM textile_sourcing WHERE material = 'cotton' AND country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "How many different types of crops were grown in each region over the past 5 years?", "schema": "CREATE TABLE Crops (date DATE, crop_type VARCHAR(20), region VARCHAR(20));", "sql": "SELECT region, COUNT(DISTINCT crop_type) OVER(PARTITION BY region ORDER BY region ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) as crop_types FROM Crops;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 149, "num_statements": 1} {"question": "What is the total number of police officers and firefighters in each city district?", "schema": "CREATE TABLE districts (did INT, name VARCHAR(255)); CREATE TABLE police_officers (oid INT, did INT, rank VARCHAR(255)); CREATE TABLE firefighters (fid INT, did INT, rank VARCHAR(255)); INSERT INTO districts VALUES (1, 'Downtown'), (2, 'Uptown'); INSERT INTO police_officers VALUES (1, 1, 'Captain'), (2, 2, 'Lieutenant'); INSERT INTO firefighters VALUES (1, 1, 'Captain'), (2, 2, 'Lieutenant');", "sql": "SELECT d.name, COUNT(po.oid) + COUNT(f.fid) as total_employees FROM districts d LEFT JOIN police_officers po ON d.did = po.did LEFT JOIN firefighters f ON d.did = f.did GROUP BY d.did;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What was the average cost of vegetarian menu items?", "schema": "CREATE TABLE menu_items (item VARCHAR(50), type VARCHAR(15), cost DECIMAL(10,2)); INSERT INTO menu_items (item, type, cost) VALUES ('Pizza Margherita', 'Vegetarian', 30.00), ('Caesar Salad', 'Vegetarian', 15.00); CREATE VIEW veg_menu_items AS SELECT item, cost FROM menu_items WHERE type = 'Vegetarian';", "sql": "SELECT AVG(cost) FROM veg_menu_items;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "pgTAP test for Cmpok (assertion 10).", "schema": null, "sql": "SELECT * FROM check_test(\n isa_ok( ''::text, 'text', 'an empty string' ),\n true,\n 'isa_ok(\"\", text, desc)',\n 'an empty string isa text',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Cmpok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the total waste quantity generated and the total number of circular economy initiatives, for each location and material, for the fourth quarter of 2024?", "schema": "CREATE TABLE WasteGeneration (Date date, Location text, Material text, Quantity integer);CREATE TABLE CircularEconomyInitiatives (Location text, Initiative text, StartDate date);", "sql": "SELECT wg.Location, wg.Material, SUM(wg.Quantity) as TotalWasteQuantity, COUNT(DISTINCT cei.Initiative) as NumberOfInitiatives FROM WasteGeneration wg LEFT JOIN CircularEconomyInitiatives cei ON wg.Location = cei.Location WHERE wg.Date >= '2024-10-01' AND wg.Date < '2025-01-01' GROUP BY wg.Location, wg.Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 313, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team on March 7?", "schema": "CREATE TABLE table_name_61 (home VARCHAR, date VARCHAR)", "sql": "SELECT home FROM table_name_61 WHERE date = 'march 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the maximum number of followers for users from India?", "schema": "CREATE TABLE users (id INT, name VARCHAR(50), country VARCHAR(2), followers INT); INSERT INTO users (id, name, country, followers) VALUES (1, 'Alice', 'US', 1000), (2, 'Bob', 'IN', 2000), (3, 'Charlie', 'CA', 1500);", "sql": "SELECT MAX(users.followers) as max_followers FROM users WHERE users.country = 'IN';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Points have an Opponent of vancouver canucks, and a November smaller than 11?", "schema": "CREATE TABLE table_name_42 (points INTEGER, opponent VARCHAR, november VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_42 WHERE opponent = 'vancouver canucks' AND november < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the party of the youngest people?", "schema": "CREATE TABLE people (Party VARCHAR, Age VARCHAR)", "sql": "SELECT Party FROM people ORDER BY Age LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which location includes Coast Mountains with a rank less than 18 at Skihist Mountain?", "schema": "CREATE TABLE table_name_41 (location VARCHAR, mountain_peak VARCHAR, mountain_range VARCHAR, rank VARCHAR)", "sql": "SELECT location FROM table_name_41 WHERE mountain_range = 'coast mountains' AND rank < 18 AND mountain_peak = 'skihist mountain';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Add a new rural healthcare facility to the 'rural_facilities' table", "schema": "CREATE TABLE rural_facilities (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(50), capacity INT, location VARCHAR(255));", "sql": "INSERT INTO rural_facilities (id, name, type, capacity, location) VALUES (1, 'Rural Health Clinic', 'Clinic', 20, '789 Elm St, Ruralville, CA 93420');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the date for dinah pfizenmaier anna zaja and winner", "schema": "CREATE TABLE table_name_46 (date VARCHAR, outcome VARCHAR, opponents VARCHAR)", "sql": "SELECT date FROM table_name_46 WHERE outcome = 'winner' AND opponents = 'dinah pfizenmaier anna zaja';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the total number of space missions launched before 1999?", "schema": "CREATE TABLE Missions (id INT, name VARCHAR(50), launch_year INT); INSERT INTO Missions (id, name, launch_year) VALUES (1, 'Mission1', 2000), (2, 'Mission2', 1999), (3, 'Mission3', 2001);", "sql": "SELECT COUNT(*) FROM Missions WHERE launch_year < 1999;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the bronze medal in Hiroshima?", "schema": "CREATE TABLE table_name_90 (bronze VARCHAR, location VARCHAR)", "sql": "SELECT bronze FROM table_name_90 WHERE location = 'hiroshima';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total number of orders placed by customers in the 'extra-large' size range?", "schema": "CREATE TABLE customer_size(customer_id INT, size VARCHAR(10)); INSERT INTO customer_size(customer_id, size) VALUES(1, 'large'), (2, 'medium'), (3, 'extra-large'), (4, 'small');", "sql": "SELECT COUNT(*) FROM customer_size WHERE size = 'extra-large';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Delete all records in the 'digital_divide_stats' table where the 'year' is before 2010", "schema": "CREATE TABLE digital_divide_stats (id INT PRIMARY KEY, country VARCHAR(255), year INT, internet_users INT, total_population INT);", "sql": "WITH deleted_data AS (DELETE FROM digital_divide_stats WHERE year < 2010 RETURNING *) SELECT * FROM deleted_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the President who has a Treasurer of james davidson, and a Season of 2006–2007?", "schema": "CREATE TABLE table_name_26 (president VARCHAR, treasurer VARCHAR, season VARCHAR)", "sql": "SELECT president FROM table_name_26 WHERE treasurer = 'james davidson' AND season = '2006–2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 393).", "schema": null, "sql": "select jsonb_path_query('\"Yes\"', '$.boolean().type()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"Yes\"', '$.boolean().type()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'matview' (example 64).", "schema": null, "sql": "CREATE VIEW mvtest_vt2 AS SELECT moo, 2*moo FROM mvtest_vt1 UNION ALL SELECT moo, 3*moo FROM mvtest_vt1;", "explanation": "DDL from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the capacity of landfills that serve cities with a population above 500,000?", "schema": "CREATE TABLE Cities (CityID INT, CityName VARCHAR(50), Population INT, LandfillID INT); INSERT INTO Cities VALUES (1, 'CityA', 600000, 1), (2, 'CityB', 800000, 2), (3, 'CityC', 550000, 3); CREATE TABLE Landfills (LandfillID INT, LandfillName VARCHAR(50), Capacity FLOAT); INSERT INTO Landfills VALUES (1, 'Landfill1', 2000000), (2, 'Landfill2', 3000000), (3, 'Landfill3', 2500000);", "sql": "SELECT LandfillName, Capacity FROM Landfills WHERE LandfillID IN (SELECT Cities.LandfillID FROM Cities WHERE Population > 500000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the L2 cache of the microprocessor with model number pentium dual-core t3400?", "schema": "CREATE TABLE table_name_91 (l2_cache VARCHAR, model_number VARCHAR)", "sql": "SELECT l2_cache FROM table_name_91 WHERE model_number = 'pentium dual-core t3400';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the processors supported by a ddr2 memory and the nforce 550 model?", "schema": "CREATE TABLE table_name_91 (processors_supported VARCHAR, memory VARCHAR, model VARCHAR)", "sql": "SELECT processors_supported FROM table_name_91 WHERE memory = 'ddr2' AND model = 'nforce 550';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "List the unique medical conditions of astronauts who have participated in space missions longer than 30 days.", "schema": "CREATE TABLE Astronaut_Medical (Astronaut_ID INT, Medical_Condition VARCHAR(50), Mission_Duration INT); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (1, 'Motion Sickness', 300); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (2, 'Space Adaptation Syndrome', 35); INSERT INTO Astronaut_Medical (Astronaut_ID, Medical_Condition, Mission_Duration) VALUES (3, 'Back Pain', 250);", "sql": "SELECT DISTINCT Medical_Condition FROM Astronaut_Medical WHERE Mission_Duration > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Stockholm's score when Malmo scored 2?", "schema": "CREATE TABLE table_name_54 (stockholm VARCHAR, malmö VARCHAR)", "sql": "SELECT stockholm FROM table_name_54 WHERE malmö = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Array (example 4).", "schema": null, "sql": "select plperl_sum_array('{{1,2,3}, {4,5,6}}');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who owned winner Blueeyesintherein after 2009?", "schema": "CREATE TABLE table_name_75 (owner VARCHAR, year VARCHAR, winner VARCHAR)", "sql": "SELECT owner FROM table_name_75 WHERE year > 2009 AND winner = 'blueeyesintherein';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 56).", "schema": null, "sql": "select jsonb_path_query('1', 'strict $[1]');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('1', 'strict $[1]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "How many operators were hired in the last month in each factory?", "schema": "CREATE TABLE factories(id INT, name TEXT, location TEXT);CREATE TABLE operators(id INT, factory_id INT, hire_date DATE);INSERT INTO factories(id, name, location) VALUES (1, 'Factory A', 'Location A'), (2, 'Factory B', 'Location B'); INSERT INTO operators(id, factory_id, hire_date) VALUES (1, 1, '2021-04-01'), (2, 1, '2021-05-01'), (3, 2, '2021-03-15');", "sql": "SELECT factory_id, COUNT(*) as new_hires FROM operators WHERE hire_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY factory_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'foreign_key' (example 578).", "schema": null, "sql": "\\d fk_notpartitioned_pk\n\n-- Check the existing FK trigger\nSELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype\nFROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid)\nWHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass)\n\t\t\t\t UNION ALL SELECT 'fk_notpartitioned_pk'::regclass)\nORDER BY tgrelid, tgtype;", "explanation": "PL/pgSQL object from PostgreSQL core test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 392, "num_statements": 1} {"question": "Which organizations have a location in 'Australia' and are of type 'Government'?", "schema": "CREATE TABLE organizations (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50), location VARCHAR(50)); INSERT INTO organizations (id, name, type, location) VALUES (1, 'Australian Renewable Energy Agency', 'Government', 'Australia'); INSERT INTO organizations (id, name, type, location) VALUES (2, 'Sustainable Energy Authority of Ireland', 'Government', 'Ireland');", "sql": "SELECT organizations.name, organizations.type, organizations.location FROM organizations WHERE organizations.location = 'Australia' AND organizations.type = 'Government';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What was the average price of electric vehicle charging in Paris per kWh in Q2 2022?", "schema": "CREATE TABLE EV_Charging_Prices (city VARCHAR(20), quarter INT, year INT, avg_price DECIMAL(5,2)); INSERT INTO EV_Charging_Prices (city, quarter, year, avg_price) VALUES ('Paris', 2, 2022, 0.25), ('Paris', 3, 2022, 0.26), ('Berlin', 2, 2022, 0.28), ('Berlin', 3, 2022, 0.29);", "sql": "SELECT AVG(avg_price) FROM EV_Charging_Prices WHERE city = 'Paris' AND quarter = 2 AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the A330 for A310 B10?", "schema": "CREATE TABLE table_name_62 (a330 VARCHAR, a310 VARCHAR)", "sql": "SELECT a330 FROM table_name_62 WHERE a310 = 'b10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "How many investments were made in total in Q2 2021?", "schema": "CREATE TABLE investments (id INT, region VARCHAR(20), date DATE); INSERT INTO investments (id, region, date) VALUES (1, 'Asia-Pacific', '2021-01-05'), (2, 'Europe', '2021-02-10'), (3, 'Asia-Pacific', '2021-03-25'), (4, 'Africa', '2021-04-15'), (5, 'Europe', '2021-06-01');", "sql": "SELECT COUNT(*) FROM investments WHERE date BETWEEN '2021-04-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the tier IV year for the tournament held in Tampa?", "schema": "CREATE TABLE table_20630462_1 (tier_iv_in VARCHAR, tournament VARCHAR)", "sql": "SELECT tier_iv_in FROM table_20630462_1 WHERE tournament = 'Tampa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total area of sustainable forests in square kilometers?", "schema": "CREATE TABLE forest (id INT, name TEXT, area_sqkm FLOAT, is_sustainable BOOLEAN);", "sql": "SELECT SUM(area_sqkm) FROM forest WHERE is_sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What are the regulatory frameworks for 'Country4' and 'Country5'?", "schema": "CREATE TABLE countries (id INT, name VARCHAR(255)); INSERT INTO countries (id, name) VALUES (1, 'Country1'); INSERT INTO countries (id, name) VALUES (2, 'Country2'); INSERT INTO countries (id, name) VALUES (3, 'Country3'); INSERT INTO countries (id, name) VALUES (4, 'Country4'); INSERT INTO countries (id, name) VALUES (5, 'Country5'); CREATE TABLE regulatory_frameworks (id INT, country_id INT, name VARCHAR(255)); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (1, 1, 'Framework1'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (2, 1, 'Framework2'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (3, 2, 'Framework3'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (4, 4, 'Framework4'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (5, 4, 'Framework5'); INSERT INTO regulatory_frameworks (id, country_id, name) VALUES (6, 5, 'Framework6');", "sql": "SELECT name FROM regulatory_frameworks WHERE country_id IN (4, 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "List the banks with the lowest number of financial capability programs offered in Q2 2022, in ascending order?", "schema": "CREATE TABLE FINANCIAL_CAPABILITY_PROGRAMS (BANK_NAME VARCHAR(50), PROGRAM_NAME VARCHAR(50), START_DATE DATE); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank F', 'Program I', '2022-04-15'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank G', 'Program J', '2022-05-20'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank F', 'Program K', '2022-06-05'); INSERT INTO FINANCIAL_CAPABILITY_PROGRAMS VALUES ('Bank H', 'Program L', '2022-04-01');", "sql": "SELECT BANK_NAME, COUNT(*) TOTAL_PROGRAMS FROM FINANCIAL_CAPABILITY_PROGRAMS WHERE START_DATE >= '2022-04-01' AND START_DATE < '2022-07-01' GROUP BY BANK_NAME ORDER BY TOTAL_PROGRAMS ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which League showed 7,975 for an average attendance?", "schema": "CREATE TABLE table_name_20 (league VARCHAR, attendance_average VARCHAR)", "sql": "SELECT league FROM table_name_20 WHERE attendance_average = '7,975';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of public transportation users in New York, London, and Paris in 2020?", "schema": "CREATE TABLE CityTransport (city VARCHAR(30), users INT, year INT); INSERT INTO CityTransport (city, users, year) VALUES ('New York', 1000000, 2020), ('London', 1200000, 2020), ('Paris', 1100000, 2020);", "sql": "SELECT SUM(users) FROM CityTransport WHERE city IN ('New York', 'London', 'Paris') AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the minimum fare for a trolleybus in the 'Delhi' region?", "schema": "CREATE TABLE trolleybuses (id INT, region VARCHAR(20), fare DECIMAL(5,2)); INSERT INTO trolleybuses (id, region, fare) VALUES (1, 'Delhi', 15.00), (2, 'Delhi', 20.00), (3, 'Mumbai', 12.00);", "sql": "SELECT MIN(fare) FROM trolleybuses WHERE region = 'Delhi';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average number of games played by players from the United States?", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (1, 'John Doe', 25, 'USA'); INSERT INTO Players (PlayerID, PlayerName, Age, Country) VALUES (2, 'Jane Smith', 30, 'Canada');", "sql": "SELECT AVG(Age) FROM Players WHERE Country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "How many matches did each player participate in during the 2020 tennis season?", "schema": "CREATE TABLE tennis (player VARCHAR(255), match_id INT); INSERT INTO tennis (player, match_id) VALUES ('Federer', 1), ('Federer', 2), ('Federer', 3), ('Djokovic', 4), ('Djokovic', 5);", "sql": "SELECT player, COUNT(*) FROM tennis GROUP BY player;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average fine for each violation in 'regulatory_compliance' table?", "schema": "CREATE TABLE regulatory_compliance (id INT, dispensary VARCHAR(255), fine FLOAT, violation DATE);", "sql": "SELECT violation, AVG(fine) as avg_fine FROM regulatory_compliance GROUP BY violation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "List all managers and their respective departments who have not completed diversity and inclusion training.", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50)); CREATE TABLE DiversityTraining (EmployeeID INT, TrainingID INT, Completed DATE); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Position) VALUES (1, 'John', 'Doe', 'IT', 'Manager'), (2, 'Jane', 'Smith', 'HR', 'Manager'); INSERT INTO DiversityTraining (EmployeeID, TrainingID, Completed) VALUES (1, 1, '2021-06-01');", "sql": "SELECT Employees.FirstName, Employees.LastName, Employees.Department FROM Employees LEFT JOIN DiversityTraining ON Employees.EmployeeID = DiversityTraining.EmployeeID WHERE Employees.Position = 'Manager' AND DiversityTraining.Completed IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "Which department has the highest number of security incidents in the 'security_incidents' table?", "schema": "CREATE TABLE security_incidents (id INT, department VARCHAR(50), date DATE);", "sql": "SELECT department, COUNT(*) as incident_count FROM security_incidents GROUP BY department ORDER BY incident_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "PostgreSQL regression test 'without_overlaps': Write the SELECT query (example 233).", "schema": null, "sql": "SELECT * FROM tp1 ORDER BY id, valid_at;", "explanation": "Regression test for Without Overlaps in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM tp1 ORDER BY id, valid_at) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What are the product names and their ratings for products with a rating greater than 4.5?", "schema": "CREATE TABLE products (product_id INT, product_name TEXT, rating FLOAT); INSERT INTO products (product_id, product_name, rating) VALUES (1, 'Product A', 4.5), (2, 'Product B', 4.2), (3, 'Product C', 4.8);", "sql": "SELECT product_name, rating FROM products WHERE rating > 4.5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 46).", "schema": null, "sql": "SELECT ''::ltree || 'Top.Child1.Child2'::ltree;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What was the average project timeline for sustainable building projects in New York?", "schema": "CREATE TABLE project_timelines (id INT PRIMARY KEY, project_name VARCHAR(255), state VARCHAR(255), start_date DATE, end_date DATE, sustainable VARCHAR(5));", "sql": "SELECT AVG(DATEDIFF(end_date, start_date)) FROM project_timelines WHERE state = 'New York' AND sustainable = 'yes';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "How many inclusive housing units are in New York and Los Angeles combined?", "schema": "CREATE TABLE inclusive_housing (units INT, city VARCHAR(20));", "sql": "SELECT SUM(units) FROM inclusive_housing WHERE city IN ('New York', 'Los Angeles');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team had the record asscoiated with the 9th inning?", "schema": "CREATE TABLE table_name_54 (team VARCHAR, inn VARCHAR)", "sql": "SELECT team FROM table_name_54 WHERE inn = '9th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which province has evening gown score of 7.61", "schema": "CREATE TABLE table_15081939_4 (province VARCHAR, evening_gown VARCHAR)", "sql": "SELECT province FROM table_15081939_4 WHERE evening_gown = '7.61';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Goals/Games for Rummenigge, Karl-Heinz, with Goals less than 162?", "schema": "CREATE TABLE table_name_66 (games VARCHAR, goals INTEGER, name VARCHAR)", "sql": "SELECT AVG(goals) / games FROM table_name_66 WHERE name = 'rummenigge, karl-heinz' AND goals < 162;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Which countries have no renewable energy projects in the renewable_projects table?", "schema": "CREATE TABLE renewable_projects (id INT, project_name VARCHAR(100), country VARCHAR(50)); INSERT INTO renewable_projects (id, project_name, country) VALUES (1, 'Renewable Project 1', 'Germany'), (2, 'Renewable Project 2', 'Sweden');", "sql": "SELECT rp.country FROM renewable_projects rp GROUP BY rp.country HAVING COUNT(*) = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total quantity of each ingredient used, broken down by day?", "schema": "CREATE TABLE ingredient_usage (ingredient_name VARCHAR(50), sale_date DATE, quantity INT); INSERT INTO ingredient_usage (ingredient_name, sale_date, quantity) VALUES ('Lettuce', '2023-03-01', 200), ('Tomatoes', '2023-03-01', 300), ('Cheese', '2023-03-01', 400), ('Lettuce', '2023-03-02', 250), ('Tomatoes', '2023-03-02', 350), ('Cheese', '2023-03-02', 450);", "sql": "SELECT sale_date, ingredient_name, SUM(quantity) FROM ingredient_usage GROUP BY sale_date, ingredient_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the match that took place in the playoff round?", "schema": "CREATE TABLE table_name_31 (score VARCHAR, round VARCHAR)", "sql": "SELECT score FROM table_name_31 WHERE round = 'playoff';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Place, when To Par is less than 15, and when Score is 76-72-75-71=294?", "schema": "CREATE TABLE table_name_59 (place VARCHAR, to_par VARCHAR, score VARCHAR)", "sql": "SELECT place FROM table_name_59 WHERE to_par < 15 AND score = 76 - 72 - 75 - 71 = 294;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 406).", "schema": null, "sql": "SELECT to_char('infinity'::interval, 'YYYY');", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('infinity'::interval, 'YYYY')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the count of new hires in the last 3 months, by recruiter and source?", "schema": "CREATE TABLE Recruitment (RecruiterID int, RecruiterName varchar(50), CandidateSource varchar(50), NewHire bit, HireDate date); INSERT INTO Recruitment (RecruiterID, RecruiterName, CandidateSource, NewHire, HireDate) VALUES (1, 'Alice', 'Job Board', 1, '2022-01-01'), (2, 'Bob', 'Referral', 1, '2022-02-01'), (3, 'Charlie', 'Career Fair', 0, '2022-03-01');", "sql": "SELECT Recruitment.RecruiterName, Recruitment.CandidateSource, COUNT(CASE WHEN Recruitment.NewHire = 1 AND Recruitment.HireDate >= DATEADD(month, -3, GETDATE()) THEN 1 ELSE NULL END) as Count_of_New_Hires FROM Recruitment GROUP BY Recruitment.RecruiterName, Recruitment.CandidateSource;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1} {"question": "Which underwriters have processed fewer than 100 policies in the last month?", "schema": "CREATE TABLE underwriter (underwriter_id INT, name VARCHAR(50)); CREATE TABLE policy (policy_id INT, underwriter_id INT, processing_date DATE);", "sql": "SELECT underwriter.name FROM underwriter LEFT JOIN (SELECT underwriter_id, COUNT(*) as policy_count FROM policy WHERE processing_date >= DATEADD(MONTH, -1, GETDATE()) GROUP BY underwriter_id) AS policy_count ON underwriter.underwriter_id = policy_count.underwriter_id WHERE policy_count.policy_count IS NULL OR policy_count.policy_count < 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 343, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 170).", "schema": null, "sql": "select age(timestamp 'infinity', timestamp 'infinity');", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select age(timestamp 'infinity', timestamp 'infinity')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangefuncs' (example 56).", "schema": null, "sql": "-- function in subselect\nselect * from rngfunc2 where f2 in (select f2 from rngfunct(rngfunc2.rngfuncid) z where z.rngfuncid = rngfunc2.rngfuncid) ORDER BY 1,2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the total amount of socially responsible loans issued by AltruisticBank in Q1 2021?", "schema": "CREATE TABLE AltruisticBank (id INT, loan_type VARCHAR(20), loan_amount INT, issue_date DATE); INSERT INTO AltruisticBank (id, loan_type, loan_amount, issue_date) VALUES (1, 'Socially Responsible', 7000, '2021-01-05');", "sql": "SELECT SUM(loan_amount) FROM AltruisticBank WHERE loan_type = 'Socially Responsible' AND QUARTER(issue_date) = 1 AND YEAR(issue_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What vehicles passed the 'Pedestrian Safety Test' in the SafetyTesting table?", "schema": "CREATE TABLE SafetyTesting (Id INT, Vehicle VARCHAR(50), Test VARCHAR(50), Result VARCHAR(50)); INSERT INTO SafetyTesting (Id, Vehicle, Test, Result) VALUES (1, 'Volvo XC60', 'Frontal Crash Test', 'Passed'), (2, 'Nissan Leaf', 'Pedestrian Safety Test', 'Passed');", "sql": "SELECT Vehicle FROM SafetyTesting WHERE Test = 'Pedestrian Safety Test' AND Result = 'Passed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 361).", "schema": null, "sql": "select jsonb_populate_record_valid(NULL::jsb_ia, '{\"a\": [1, 2]}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_populate_record_valid(NULL::jsb_ia, '{\"a\": [1, 2]}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the id, the account name, and other account details for all accounts by the customer with first name 'Meaghan'.", "schema": "CREATE TABLE Accounts (account_id VARCHAR, date_account_opened VARCHAR, account_name VARCHAR, other_account_details VARCHAR, customer_id VARCHAR); CREATE TABLE Customers (customer_id VARCHAR, customer_first_name VARCHAR)", "sql": "SELECT T1.account_id, T1.date_account_opened, T1.account_name, T1.other_account_details FROM Accounts AS T1 JOIN Customers AS T2 ON T1.customer_id = T2.customer_id WHERE T2.customer_first_name = 'Meaghan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "How many Arctic fox sightings have been recorded each year for the last 5 years?", "schema": "CREATE TABLE arctic_fox_sightings (id INT, year INT, sightings INT);", "sql": "SELECT year, SUM(sightings) FROM arctic_fox_sightings WHERE year BETWEEN (YEAR(CURRENT_DATE) - 5) AND YEAR(CURRENT_DATE) GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Margin has a Dist (f) larger than 10, and a Race of king george vi & queen elizabeth stakes?", "schema": "CREATE TABLE table_name_33 (margin VARCHAR, dist__f_ VARCHAR, race VARCHAR)", "sql": "SELECT margin FROM table_name_33 WHERE dist__f_ > 10 AND race = 'king george vi & queen elizabeth stakes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Insert new record for student 'Sara Lee' with ID 456 and disability 'Physical'", "schema": "CREATE TABLE Students (StudentID INT PRIMARY KEY, Name VARCHAR(50), Disability VARCHAR(20));", "sql": "INSERT INTO Students (StudentID, Name, Disability) VALUES (456, 'Sara Lee', 'Physical');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Which mine has the lowest labor productivity?", "schema": "CREATE TABLE mines (mine_id INT, name TEXT, location TEXT, productivity FLOAT); INSERT INTO mines (mine_id, name, location, productivity) VALUES (1, 'ABC Mine', 'USA', 1200), (2, 'DEF Mine', 'Canada', 800);", "sql": "SELECT name, MIN(productivity) FROM mines;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the average production budget for action movies released between 2000 and 2010?", "schema": "CREATE TABLE movies (id INT, title VARCHAR(100), genre VARCHAR(50), release_year INT, production_budget INT); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (1, 'MovieA', 'Action', 2005, 15000000); INSERT INTO movies (id, title, genre, release_year, production_budget) VALUES (2, 'MovieB', 'Action', 2002, 20000000);", "sql": "SELECT AVG(production_budget) FROM movies WHERE genre = 'Action' AND release_year BETWEEN 2000 AND 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 41).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION plan_composite_args() RETURNS test_composite_type AS $$\nplan = plpy.prepare(\"select $1 as c1\", [\"test_composite_type\"])\nres = plpy.execute(plan, [{\"a1\": 3, \"a2\": \"label\"}])\nreturn res[0][\"c1\"]\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 243, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 573).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION findfuncs( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the daily revenue for each menu category in the last 30 days?", "schema": "CREATE TABLE daily_sales (sale_date DATE, menu_category VARCHAR(255), revenue INT);", "sql": "SELECT sale_date, menu_category, SUM(revenue) as daily_revenue FROM daily_sales WHERE sale_date BETWEEN DATEADD(day, -30, GETDATE()) AND GETDATE() GROUP BY sale_date, menu_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the most common type of crime in Paris, and how many times did it occur?", "schema": "CREATE TABLE crimes (id INT, city VARCHAR(255), date DATE, type VARCHAR(255), description TEXT); INSERT INTO crimes (id, city, date, type, description) VALUES (1, 'Paris', '2022-01-01', 'Theft', 'Bicycle theft'), (2, 'Paris', '2022-02-01', 'Vandalism', 'Graffiti');", "sql": "SELECT type, COUNT(*) FROM crimes WHERE city = 'Paris' GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different results are there for the battles?", "schema": "CREATE TABLE battle (RESULT VARCHAR)", "sql": "SELECT COUNT(DISTINCT RESULT) FROM battle;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What location is listed from 2005-2010?", "schema": "CREATE TABLE table_26476336_2 (location VARCHAR, years VARCHAR)", "sql": "SELECT location FROM table_26476336_2 WHERE years = '2005-2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many players are in each team?", "schema": "CREATE TABLE EsportsTeams (TeamID INT, TeamName TEXT, Location TEXT); CREATE TABLE EsportsPlayers (PlayerID INT, PlayerName TEXT, TeamID INT); INSERT INTO EsportsTeams (TeamID, TeamName, Location) VALUES (1, 'TeamA', 'CityA'), (2, 'TeamB', 'CityB'), (3, 'TeamC', 'CityC'); INSERT INTO EsportsPlayers (PlayerID, PlayerName, TeamID) VALUES (1, 'Player1', 1), (2, 'Player2', 1), (3, 'Player3', 2), (4, 'Player4', 3);", "sql": "SELECT TeamID, COUNT(*) FROM EsportsPlayers GROUP BY TeamID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What european country had more than 14 goals, and 52 538 avg att?", "schema": "CREATE TABLE table_name_83 (europe VARCHAR, goals VARCHAR, avgatt VARCHAR)", "sql": "SELECT europe FROM table_name_83 WHERE goals > 14 AND avgatt = '52 538';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the states with the most and fewest number of workplaces that have experienced labor disputes in the past year.", "schema": "CREATE TABLE WorkplaceDisputes (State TEXT, Disputes INT); INSERT INTO WorkplaceDisputes (State, Disputes) VALUES ('California', 150), ('Texas', 100), ('New York', 200), ('Florida', 50);", "sql": "SELECT State, Disputes FROM WorkplaceDisputes WHERE Disputes = (SELECT MAX(Disputes) FROM WorkplaceDisputes) OR Disputes = (SELECT MIN(Disputes) FROM WorkplaceDisputes);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "How many permits were issued for commercial buildings in Los Angeles between 2018 and 2020?", "schema": "CREATE TABLE building_permits (permit_id INT, building_type VARCHAR(20), city VARCHAR(20), issue_date DATE); INSERT INTO building_permits (permit_id, building_type, city, issue_date) VALUES (4, 'Commercial', 'Los Angeles', '2018-04-01'), (5, 'Residential', 'Los Angeles', '2019-07-15'), (6, 'Commercial', 'Los Angeles', '2020-11-05');", "sql": "SELECT COUNT(*) FROM building_permits WHERE building_type = 'Commercial' AND city = 'Los Angeles' AND issue_date BETWEEN '2018-01-01' AND '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the minimum and maximum salary for each position in the Mining department?", "schema": "CREATE TABLE Employees(id INT, name VARCHAR(50), department VARCHAR(50), position VARCHAR(50), salary FLOAT, full_time BOOLEAN, gender VARCHAR(50), start_date DATE);", "sql": "SELECT position, MIN(salary) AS Min_Salary, MAX(salary) AS Max_Salary FROM Employees WHERE department = 'Mining' GROUP BY position;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player has the score of 67-72=139?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE score = 67 - 72 = 139;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What rank was the team from Australia?", "schema": "CREATE TABLE table_name_77 (rank VARCHAR, country VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_77 WHERE country = 'australia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Find the digital assets that were launched most recently, along with the country they were launched in, in descending order.", "schema": "CREATE TABLE DigitalAssets (AssetID int, AssetName varchar(50), LaunchDate date); INSERT INTO DigitalAssets (AssetID, AssetName, LaunchDate) VALUES (1, 'Asset1', '2022-01-01'), (2, 'Asset2', '2022-02-01'), (3, 'Asset3', '2022-03-01');", "sql": "SELECT AssetName, Country, LaunchDate FROM (SELECT AssetName, Country, LaunchDate, ROW_NUMBER() OVER (ORDER BY LaunchDate DESC) as Rank FROM DigitalAssets) as RankedAssets WHERE Rank = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 187, "num_statements": 1} {"question": "List the carbon pricing schemes and their corresponding carbon prices for the year 2021, sorted by carbon price in descending order.", "schema": "CREATE TABLE carbon_pricing (scheme VARCHAR(255), year INT, carbon_price FLOAT); INSERT INTO carbon_pricing (scheme, year, carbon_price) VALUES ('ETS', 2021, 30.56);", "sql": "SELECT * FROM carbon_pricing WHERE year = 2021 ORDER BY carbon_price DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of series number 56?", "schema": "CREATE TABLE table_2468961_4 (title VARCHAR, no_in_series VARCHAR)", "sql": "SELECT title FROM table_2468961_4 WHERE no_in_series = 56;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the maximum number of visitors for an exhibition in Chicago in 2020?", "schema": "CREATE TABLE ExhibitionsChicago (id INT, exhibition_name VARCHAR(30), city VARCHAR(20), year INT, visitor_count INT); INSERT INTO ExhibitionsChicago (id, exhibition_name, city, year, visitor_count) VALUES (1, 'Van Gogh in Chicago', 'Chicago', 2020, 100000), (2, 'Impressionism in Motion', 'Chicago', 2020, 85000), (3, 'Ancient Egypt', 'Chicago', 2020, 120000);", "sql": "SELECT exhibition_name, MAX(visitor_count) FROM ExhibitionsChicago WHERE city = 'Chicago' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the number of military personnel in each branch of the Chinese military?", "schema": "CREATE TABLE MilitaryPersonnel (id INT, name VARCHAR(255), branch VARCHAR(255), personnel_count INT); INSERT INTO MilitaryPersonnel (id, name, branch, personnel_count) VALUES (1, 'Li Wei', 'Ground Forces', 800000), (2, 'Zhang Li', 'Air Force', 450000), (3, 'Wang Xiao', 'Navy', 300000);", "sql": "SELECT branch, personnel_count FROM MilitaryPersonnel WHERE branch IN ('Ground Forces', 'Air Force', 'Navy') GROUP BY branch;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "What is the total fare collected for each month?", "schema": "CREATE TABLE trip (trip_id INT, fare DECIMAL(10,2), trip_date DATE); INSERT INTO trip (trip_id, fare, trip_date) VALUES (1, 2.00, '2022-01-01'), (2, 3.00, '2022-01-02'), (3, 4.00, '2022-02-01'), (4, 5.00, '2022-02-02');", "sql": "SELECT EXTRACT(MONTH FROM trip_date) AS month, SUM(fare) AS total_fare FROM trip GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total number of hours of professional development per instructor per district?", "schema": "CREATE TABLE development_hours (teacher_id INT, district_id INT, hours_developed INT);", "sql": "SELECT d.district_id, t.instructor_id, SUM(d.hours_developed) as total_hours FROM development_hours d INNER JOIN teachers t ON d.teacher_id = t.teacher_id GROUP BY d.district_id, t.instructor_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "What is the average budget allocated for disability support programs per region, ordered by the highest average budget?", "schema": "CREATE TABLE Disability_Support_Programs (Region VARCHAR(50), Budget NUMERIC(10,2)); INSERT INTO Disability_Support_Programs VALUES ('Northeast', 500000), ('Southeast', 600000), ('Midwest', 400000), ('Southwest', 700000), ('West', 550000);", "sql": "SELECT Region, AVG(Budget) as Avg_Budget FROM Disability_Support_Programs GROUP BY Region ORDER BY Avg_Budget DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the opposing team on february 15, 2003.", "schema": "CREATE TABLE table_26360571_2 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_26360571_2 WHERE date = 'February 15, 2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average carbon offset by project?", "schema": "CREATE TABLE carbon_offsets (project_id INT, carbon_offsets FLOAT); INSERT INTO carbon_offsets (project_id, carbon_offsets) VALUES (1, 1200.5), (2, 1800.75), (3, 2500.33);", "sql": "SELECT AVG(carbon_offsets) FROM carbon_offsets;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Calculate the total water usage for all mining operations, per month", "schema": "CREATE TABLE WaterUsage (SiteID INT, UsageDate DATE, AmountUsed INT); INSERT INTO WaterUsage (SiteID, UsageDate, AmountUsed) VALUES (1, '2021-01-01', 500), (1, '2021-01-15', 700);", "sql": "SELECT DATE_FORMAT(UsageDate, '%Y-%m') as Month, SUM(AmountUsed) as TotalWaterUsage FROM WaterUsage GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the position with height of 2.09", "schema": "CREATE TABLE table_name_10 (position VARCHAR, height VARCHAR)", "sql": "SELECT position FROM table_name_10 WHERE height = 2.09;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which engine did dr ing f porsche kg use with the porsche rsk (f2) chassis?", "schema": "CREATE TABLE table_name_77 (engine VARCHAR, entrant VARCHAR, chassis VARCHAR)", "sql": "SELECT engine FROM table_name_77 WHERE entrant = 'dr ing f porsche kg' AND chassis = 'porsche rsk (f2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 116).", "schema": null, "sql": "-- check that using a function as a trigger over two tables works correctly\nCREATE FUNCTION trig1234() RETURNS trigger LANGUAGE plpython3u AS $$\n TD[\"new\"][\"data\"] = '1234'\n return 'MODIFY'\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much Latitude has a Water (sqmi) smaller than 0?", "schema": "CREATE TABLE table_name_69 (latitude VARCHAR, water__sqmi_ INTEGER)", "sql": "SELECT COUNT(latitude) FROM table_name_69 WHERE water__sqmi_ < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the percentage of policy advocacy efforts focused on mental health in each continent?", "schema": "CREATE TABLE continent (continent_id INT, continent_name VARCHAR(50), country_code VARCHAR(5)); INSERT INTO continent (continent_id, continent_name, country_code) VALUES (1, 'North America', 'USA'), (2, 'Europe', 'FRA'); CREATE TABLE policy_advocacy (policy_id INT, policy_name VARCHAR(50), continent_id INT, focus_area VARCHAR(50)); INSERT INTO policy_advocacy (policy_id, policy_name, continent_id, focus_area) VALUES (1, 'Mental Health Parity Act', 1, 'Mental Health'), (2, 'Accessibility for Ontarians with Disabilities Act', 2, 'Physical Accessibility');", "sql": "SELECT C.continent_name, COUNT(PA.policy_id) * 100.0 / (SELECT COUNT(*) FROM policy_advocacy WHERE focus_area = 'Mental Health') as mental_health_percentage FROM policy_advocacy PA JOIN continent C ON PA.continent_id = C.continent_id WHERE PA.focus_area = 'Mental Health' GROUP BY C.continent_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 298, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was J.P. Viernes' last performance?", "schema": "CREATE TABLE table_name_26 (last_performance VARCHAR, name VARCHAR)", "sql": "SELECT last_performance FROM table_name_26 WHERE name = 'j.p. viernes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The golfer Jeff Sluman golfs for what country?", "schema": "CREATE TABLE table_name_13 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_13 WHERE player = 'jeff sluman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the runner up before 2007?", "schema": "CREATE TABLE table_name_93 (runner_up VARCHAR, season INTEGER)", "sql": "SELECT runner_up FROM table_name_93 WHERE season < 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: After 1971, what is the Rank with a Height ft (m) of 19.0 477 (145) and less than 35 Floors?", "schema": "CREATE TABLE table_name_90 (rank VARCHAR, height_ft__m_ VARCHAR, year VARCHAR, floors VARCHAR)", "sql": "SELECT rank FROM table_name_90 WHERE year > 1971 AND floors < 35 AND height_ft__m_ = '19.0 477 (145)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 181).", "schema": null, "sql": "SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Party of republican, and a District of 7th is what elected?", "schema": "CREATE TABLE table_name_81 (elected VARCHAR, party VARCHAR, district VARCHAR)", "sql": "SELECT elected FROM table_name_81 WHERE party = 'republican' AND district = '7th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the maximum max speed of vessels that have a type of 'Passenger'?", "schema": "CREATE TABLE Vessel (vessel_id INT, name VARCHAR(255), type VARCHAR(255), max_speed DECIMAL(5,2)); INSERT INTO Vessel (vessel_id, name, type, max_speed) VALUES (1, 'Test Vessel 1', 'Cargo', 20.5), (2, 'Test Vessel 2', 'Tanker', 15.2), (3, 'Test Vessel 3', 'Passenger', 30.7), (4, 'Test Vessel 4', 'Passenger', 35.6);", "sql": "SELECT MAX(v.max_speed) FROM Vessel v WHERE v.type = 'Passenger';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 1224).", "schema": null, "sql": "SELECT tableoid::regclass, * FROM batch_cp_upd_test ORDER BY 1;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many public schools are there in the capital city?", "schema": "CREATE TABLE cities (city_id INT, city_name VARCHAR(255), capital_city BOOLEAN); INSERT INTO cities (city_id, city_name, capital_city) VALUES (1, 'Capital City', TRUE), (2, 'Second City', FALSE), (3, 'Third Town', FALSE); CREATE TABLE schools (school_id INT, school_name VARCHAR(255), city_id INT); INSERT INTO schools (school_id, school_name, city_id) VALUES (1, 'Capital High', 1), (2, 'North Secondary', 2), (3, 'East Elementary', 3);", "sql": "SELECT COUNT(*) FROM schools WHERE city_id = (SELECT city_id FROM cities WHERE capital_city = TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the average cargo handling time in hours for the 'handling_events' table?", "schema": "CREATE TABLE handling_events (event_id INT, port_id INT, event_time TIME); INSERT INTO handling_events (event_id, port_id, event_time) VALUES (1, 1, '12:30:00'), (2, 2, '10:00:00'), (3, 3, '14:00:00');", "sql": "SELECT AVG(TIME_TO_SEC(event_time) / 3600) FROM handling_events;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the english translation when the artist is ann christine?", "schema": "CREATE TABLE table_name_10 (english_translation VARCHAR, artist VARCHAR)", "sql": "SELECT english_translation FROM table_name_10 WHERE artist = 'ann christine';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Assembled, when Summoned is \"6 October 1297\"?", "schema": "CREATE TABLE table_name_55 (assembled VARCHAR, summoned VARCHAR)", "sql": "SELECT assembled FROM table_name_55 WHERE summoned = '6 october 1297';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of all swimmers, sorted by their 100 meter scores in ascending order.", "schema": "CREATE TABLE swimmer (name VARCHAR, meter_100 VARCHAR)", "sql": "SELECT name FROM swimmer ORDER BY meter_100;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Array (example 27).", "schema": null, "sql": "select plperl_arrays_inout_l('{{1}, {2}, {3}}');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 30).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i<='infinity'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average sugar content in organic vegan desserts?", "schema": "CREATE TABLE products (id INT, category TEXT, is_organic BOOLEAN, is_vegan BOOLEAN, sugar_grams FLOAT); INSERT INTO products (id, category, is_organic, is_vegan, sugar_grams) VALUES (1, 'dessert', true, true, 12.5), (2, 'dessert', false, true, 15.0), (3, 'dessert', true, false, 8.0), (4, 'dessert', false, false, 10.0);", "sql": "SELECT AVG(sugar_grams) FROM products WHERE is_organic = true AND is_vegan = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Calculate the percentage of time each type of equipment was in use in the past month.", "schema": "CREATE TABLE equipment_usage (id INT, equipment_type VARCHAR(255), usage_duration INT, timestamp DATETIME); INSERT INTO equipment_usage (id, equipment_type, usage_duration, timestamp) VALUES (1, 'Tractor', 120, '2022-01-01 10:00:00');", "sql": "SELECT equipment_type, 100.0 * SUM(usage_duration) / SUM(CASE WHEN equipment_type = 'Tractor' THEN usage_duration ELSE 0 END) as usage_percentage FROM equipment_usage WHERE timestamp >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 MONTH) GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1} {"question": "What is the maximum sea level rise recorded in the Atlantic region?", "schema": "CREATE TABLE sea_level_data (id INT, region VARCHAR(50), sea_level_rise DECIMAL); INSERT INTO sea_level_data (id, region, sea_level_rise) VALUES (1, 'Pacific', 0.3); INSERT INTO sea_level_data (id, region, sea_level_rise) VALUES (2, 'Atlantic', 0.5);", "sql": "SELECT MAX(sea_level_rise) FROM sea_level_data WHERE region = 'Atlantic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total income of clients in the Philippines who are under 35?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(100), age INT, country VARCHAR(50), income DECIMAL(10,2)); INSERT INTO clients (client_id, name, age, country, income) VALUES (7, 'Maria Santos', 30, 'Philippines', 35000);", "sql": "SELECT SUM(income) FROM clients WHERE country = 'Philippines' AND age < 35;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what week was the away team Auckland?", "schema": "CREATE TABLE table_name_92 (week VARCHAR, away_team VARCHAR)", "sql": "SELECT week FROM table_name_92 WHERE away_team = 'auckland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 42).", "schema": null, "sql": "-- col_is_unique( scheam, table, column )\nCREATE OR REPLACE FUNCTION col_is_unique ( NAME, NAME, NAME )\nRETURNS TEXT AS $$\n SELECT col_is_unique( $1, $2, ARRAY[$3], 'Column ' || quote_ident($2) || '(' || quote_ident($3) || ') should have a unique constraint' );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 281, "num_statements": 2} {"question": "What is the maximum cost of devices for users in urban areas?", "schema": "CREATE TABLE devices (device_id INT, device_cost FLOAT, user_location VARCHAR(10)); INSERT INTO devices VALUES (1, 300, 'rural'), (2, 500, 'urban'), (3, 400, 'rural');", "sql": "SELECT MAX(device_cost) FROM devices WHERE user_location = 'urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the climate finance data to reflect the current inflation rates, using the 'inflation_rates' table.", "schema": "CREATE TABLE climate_finance (project VARCHAR(50), country VARCHAR(50), amount FLOAT, date DATE); CREATE TABLE inflation_rates (country VARCHAR(50), rate FLOAT, date DATE); INSERT INTO climate_finance (project, country, amount, date) VALUES ('Green City', 'USA', 5000000, '2020-01-01'); INSERT INTO inflation_rates (country, rate, date) VALUES ('USA', 1.02, '2020-01-01');", "sql": "UPDATE climate_finance SET amount = amount * (SELECT rate FROM inflation_rates WHERE climate_finance.country = inflation_rates.country AND climate_finance.date = inflation_rates.date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Select names of all sustainable suppliers", "schema": "CREATE TABLE supplier_info (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), sustainable_practices BOOLEAN); CREATE VIEW sustainable_suppliers AS SELECT * FROM supplier_info WHERE sustainable_practices = TRUE;", "sql": "SELECT name FROM sustainable_suppliers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 196).", "schema": null, "sql": "SELECT EXTRACT(TIMEZONE_M FROM DATE '2020-08-11');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(TIMEZONE_M FROM DATE '2020-08-11')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which week was the team's bye week?", "schema": "CREATE TABLE table_name_78 (week INTEGER, attendance VARCHAR)", "sql": "SELECT MAX(week) FROM table_name_78 WHERE attendance = 'bye';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which translation was published in 1986?", "schema": "CREATE TABLE table_name_78 (translation VARCHAR, date VARCHAR)", "sql": "SELECT translation FROM table_name_78 WHERE date = 1986;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of 'recycling' facilities in 'Japan'?", "schema": "CREATE TABLE facilities (id INT, name TEXT, type TEXT, location TEXT); INSERT INTO facilities (id, name, type, location) VALUES (1, 'recycling plant', 'recycling', 'Japan'), (2, 'waste treatment plant', 'waste', 'Japan'), (3, 'recycling plant', 'recycling', 'China');", "sql": "SELECT COUNT(*) FROM facilities WHERE type = 'recycling' AND location = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total number of shipments for each warehouse, excluding cancelled shipments?", "schema": "CREATE TABLE shipments (shipment_id INT, warehouse_id VARCHAR(5), quantity INT, cancelled BOOLEAN); CREATE TABLE warehouses (warehouse_id VARCHAR(5), city VARCHAR(5), state VARCHAR(3)); INSERT INTO shipments VALUES (1, 'LAX', 200, FALSE), (2, 'NYC', 300, TRUE), (3, 'LAX', 100, FALSE), (4, 'JFK', 50, FALSE); INSERT INTO warehouses VALUES ('LAX', 'Los', ' Angeles'), ('NYC', 'New', ' York'), ('JFK', 'New', ' York');", "sql": "SELECT warehouses.warehouse_id, COUNT(shipments.shipment_id) FROM warehouses LEFT JOIN shipments ON warehouses.warehouse_id = shipments.warehouse_id WHERE NOT shipments.cancelled GROUP BY warehouses.warehouse_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Show the total number of vehicles in 'Tokyo', 'Delhi', and 'Sao Paulo'", "schema": "CREATE TABLE public.vehicles (id INT, type VARCHAR(20), city VARCHAR(20)); INSERT INTO public.vehicles (id, type, city) VALUES (1, 'electric_car', 'Tokyo'), (2, 'conventional_car', 'Tokyo'), (3, 'autonomous_bus', 'Delhi'), (4, 'conventional_car', 'Delhi'), (5, 'electric_bus', 'Sao Paulo'), (6, 'conventional_bus', 'Sao Paulo');", "sql": "SELECT SUM(cnt) FROM (SELECT city, COUNT(*) AS cnt FROM public.vehicles WHERE city IN ('Tokyo', 'Delhi', 'Sao Paulo') GROUP BY city) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What event had 1 round and a record of 6-2?", "schema": "CREATE TABLE table_name_9 (event VARCHAR, round VARCHAR, record VARCHAR)", "sql": "SELECT event FROM table_name_9 WHERE round = 1 AND record = '6-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What lane did the swimmer with a time of 52.84 have?", "schema": "CREATE TABLE table_name_43 (lane INTEGER, time VARCHAR)", "sql": "SELECT MAX(lane) FROM table_name_43 WHERE time = 52.84;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of year with the local host Sai?", "schema": "CREATE TABLE table_name_37 (year INTEGER, local_host_s_ VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_37 WHERE local_host_s_ = 'sai';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Hungary's highest Rank?", "schema": "CREATE TABLE table_name_38 (rank INTEGER, country VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_38 WHERE country = 'hungary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Insert a new record into the customers table", "schema": "CREATE TABLE customers (customer_id INT, first_name VARCHAR(50), last_name VARCHAR(50), email VARCHAR(100), phone_number VARCHAR(15), created_at TIMESTAMP);", "sql": "INSERT INTO customers (customer_id, first_name, last_name, email, phone_number, created_at) VALUES (1001, 'Jamal', 'Lewis', 'jamal.lewis@mail.com', '5551234567', '2022-01-01 10:30:00');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest year joined with a Conference championships of 5, and an Institution of university of north carolina?", "schema": "CREATE TABLE table_name_84 (joined INTEGER, conference_championships VARCHAR, institution VARCHAR)", "sql": "SELECT MAX(joined) FROM table_name_84 WHERE conference_championships = 5 AND institution = 'university of north carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Show the number of customer complaints regarding mobile and broadband services, pivoted by the service type and month.", "schema": "CREATE TABLE customer_complaints (complaint_id INT, complaint_date DATE, service_type VARCHAR(50)); INSERT INTO customer_complaints (complaint_id, complaint_date, service_type) VALUES (1, '2022-01-01', 'Mobile'), (2, '2022-02-01', 'Broadband'), (3, '2022-01-01', 'Broadband');", "sql": "SELECT EXTRACT(MONTH FROM complaint_date) as month, service_type, COUNT(*) as complaints FROM customer_complaints GROUP BY month, service_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Find the maximum number of attendees in virtual tours across Asian countries, in the last 6 months.", "schema": "CREATE TABLE virtual_tours (id INT, location TEXT, attendees INT, tour_date DATE); INSERT INTO virtual_tours (id, location, attendees, tour_date) VALUES (1, 'Tokyo', 25, '2022-01-01'), (2, 'Seoul', 30, '2022-02-10');", "sql": "SELECT MAX(attendees) FROM virtual_tours WHERE location LIKE '%Asia%' AND tour_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Result of win, and a Score of 33-22 involved what event?", "schema": "CREATE TABLE table_name_4 (event VARCHAR, result VARCHAR, score VARCHAR)", "sql": "SELECT event FROM table_name_4 WHERE result = 'win' AND score = '33-22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of eco-certified destinations in Oceania?", "schema": "CREATE TABLE destinations (destination_id INT, name VARCHAR(50), country_id INT, is_eco_certified BOOLEAN); INSERT INTO destinations (destination_id, name, country_id, is_eco_certified) VALUES (11, 'Great Barrier Reef', 14, true); INSERT INTO destinations (destination_id, name, country_id, is_eco_certified) VALUES (12, 'Fiordland National Park', 15, true);", "sql": "SELECT COUNT(*) FROM destinations d WHERE d.is_eco_certified = true AND d.country_id IN (SELECT country_id FROM countries WHERE continent = 'Oceania');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Update the R&D expenditure for 'DrugB' to $2,500,000 in Q3 2019.", "schema": "CREATE TABLE rd_expenditure (drug_name TEXT, quarter INTEGER, year INTEGER, amount INTEGER); INSERT INTO rd_expenditure (drug_name, quarter, year, amount) VALUES ('DrugB', 3, 2019, 2000000);", "sql": "UPDATE rd_expenditure SET amount = 2500000 WHERE drug_name = 'DrugB' AND quarter = 3 AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the total number of defense diplomacy events in the diplomacy_2020 table, grouped by region?", "schema": "CREATE TABLE diplomacy_2020 (id INT, region VARCHAR(255), event VARCHAR(255)); INSERT INTO diplomacy_2020 VALUES (1, 'Asia', 'Summit A'), (2, 'Africa', 'Summit B'), (3, 'Asia', 'Summit C');", "sql": "SELECT region, COUNT(*) FROM diplomacy_2020 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: name the team for 36-29 record", "schema": "CREATE TABLE table_27902171_8 (team VARCHAR, record VARCHAR)", "sql": "SELECT team FROM table_27902171_8 WHERE record = '36-29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the country that has a place of T6, a socre of 69-69=138, and where Niclas Fasth played?", "schema": "CREATE TABLE table_name_17 (country VARCHAR, player VARCHAR, place VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_17 WHERE place = 't6' AND score = 69 - 69 = 138 AND player = 'niclas fasth';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "pgTAP test for Proctap (assertion 72).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_procedure( 'argpubfunc' ),\n true,\n 'isnt_procedure(func)',\n 'Function argpubfunc() should not be a procedure',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Proctap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "What is the circular economy initiative in the city of London?", "schema": "CREATE TABLE circular_economy (city VARCHAR(255), initiative VARCHAR(255)); INSERT INTO circular_economy (city, initiative) VALUES ('London', 'Waste to Wealth');", "sql": "SELECT initiative FROM circular_economy WHERE city = 'London';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Rd. Time for October 3, 2009?", "schema": "CREATE TABLE table_name_86 (rd VARCHAR, _time VARCHAR, date VARCHAR)", "sql": "SELECT rd, _time FROM table_name_86 WHERE date = 'october 3, 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL Rules: show example 35.", "schema": null, "sql": "INSERT INTO shoelace_log VALUES ( shoelace_data.sl_name, 6, current_user, current_timestamp ) FROM shoelace_data WHERE 6 <> shoelace_data.sl_avail AND shoelace_data.sl_name = 'sl7'; UPDATE shoelace_data SET sl_avail = 6 WHERE sl_name = 'sl7';", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 2} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 227).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (5,3,'3804.41728329466357308584');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total number of mobile and broadband subscribers for each technology type?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, technology VARCHAR(20)); CREATE TABLE broadband_subscribers (subscriber_id INT, technology VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, technology) VALUES (1, '4G'), (2, '5G'), (3, '3G'); INSERT INTO broadband_subscribers (subscriber_id, technology) VALUES (4, 'Fiber'), (5, 'Cable'), (6, 'DSL');", "sql": "SELECT 'Mobile' as source, technology, COUNT(*) as total FROM mobile_subscribers GROUP BY technology UNION ALL SELECT 'Broadband' as source, technology, COUNT(*) as total FROM broadband_subscribers GROUP BY technology;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "Show the number of dams in each state", "schema": "CREATE TABLE Dams (id INT, state VARCHAR(50)); INSERT INTO Dams (id, state) VALUES (1, 'California'), (2, 'Texas');", "sql": "SELECT state, COUNT(*) FROM Dams GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest numbered event?", "schema": "CREATE TABLE table_30060356_3 (event INTEGER)", "sql": "SELECT MAX(event) FROM table_30060356_3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What is the average time taken for cases to be resolved for each ethnicity of judges?", "schema": "CREATE TABLE public.judges (id SERIAL PRIMARY KEY, name VARCHAR(255), age INT, ethnicity VARCHAR(255), appointment_date DATE); CREATE TABLE public.cases (id SERIAL PRIMARY KEY, judge_id INT, case_number VARCHAR(255), case_date DATE, case_type VARCHAR(255), court_location VARCHAR(255));", "sql": "SELECT j.ethnicity, AVG(c.case_date - j.appointment_date) as average_time_to_resolve FROM public.judges j JOIN public.cases c ON j.id = c.judge_id GROUP BY j.ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 168, "num_statements": 1} {"question": "Identify the agricultural innovation metrics that have the lowest average score in Central America and the Caribbean.", "schema": "CREATE TABLE innovation_metrics (id INT, name TEXT, score INT, region TEXT); INSERT INTO innovation_metrics (id, name, score, region) VALUES (1, 'Soil Monitoring', 7, 'Central America'), (2, 'Irrigation', 6, 'Caribbean'), (3, 'Crop Yield', 8, 'Central America'), (4, 'Livestock Management', 9, 'Caribbean');", "sql": "SELECT name, AVG(score) as avg_score FROM innovation_metrics WHERE region IN ('Central America', 'Caribbean') GROUP BY name ORDER BY avg_score LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What was the total donation amount by individuals in Canada in Q1 2021?", "schema": "CREATE TABLE Donations (id INT, donor_name VARCHAR(255), donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO Donations (id, donor_name, donation_amount, donation_date) VALUES (1, 'John Smith', 50.00, '2021-01-10'), (2, 'Emily Johnson', 75.00, '2021-03-15');", "sql": "SELECT SUM(donation_amount) FROM Donations WHERE donor_name NOT LIKE '%org%' AND donation_date BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the number of unique donors per country, for countries that have received donations?", "schema": "CREATE TABLE donors (id INT, name TEXT, organization TEXT, country TEXT);CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2)); INSERT INTO donors (id, name, organization, country) VALUES (1, 'Donor A', 'Organization 1', 'Country A'), (2, 'Donor B', 'Organization 2', 'Country A'), (3, 'Donor C', 'Organization 3', 'Country B'), (4, 'Donor D', 'Organization 4', 'Country C'); INSERT INTO donations (id, donor_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 300.00), (4, 3, 1000.00), (5, 4, 250.00);", "sql": "SELECT donors.country, COUNT(DISTINCT donors.id) FROM donors INNER JOIN donations ON donors.id = donations.donor_id GROUP BY donors.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show party names and the number of events for each party.", "schema": "CREATE TABLE party (party_name VARCHAR, party_id VARCHAR); CREATE TABLE party_events (party_id VARCHAR)", "sql": "SELECT T2.party_name, COUNT(*) FROM party_events AS T1 JOIN party AS T2 ON T1.party_id = T2.party_id GROUP BY T1.party_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Which classical models were developed for explainable AI?", "schema": "CREATE TABLE Explainable_Models (Model_Type VARCHAR(20), Model_Name VARCHAR(30)); INSERT INTO Explainable_Models (Model_Type, Model_Name) VALUES ('Classical', 'Decision Trees'), ('Classical', 'Logistic Regression');", "sql": "SELECT Model_Name FROM Explainable_Models WHERE Model_Type = 'Classical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 39).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_function( 'pg_catalog', 'now', 'whatever' ),\n false,\n 'simple schema.func with desc',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1} {"question": "Which restorative justice programs have the highest participant satisfaction rates by facilitator?", "schema": "CREATE TABLE participants (participant_id INT, participant_satisfaction INT, program_id INT); CREATE TABLE programs (program_id INT, facilitator_id INT, program_type VARCHAR(255));", "sql": "SELECT facilitator_name, MAX(AVG(participant_satisfaction)) as avg_satisfaction FROM participants JOIN programs ON programs.program_id = participants.program_id JOIN facilitators ON programs.facilitator_id = facilitators.facilitator_id GROUP BY facilitator_name HAVING program_type = 'Restorative Justice';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 306, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Queries (example 14).", "schema": null, "sql": "-- test some error cases\ncreate function tcl_error(out a int, out b int) as $$returm 1$$ language pltcl;\nselect tcl_error();\n\ncreate function bad_record(out a text, out b text) as $$return [list a]$$ language pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is the week 12 opponent for the year that had a week 3 opponent of South Florida (3-0)?", "schema": "CREATE TABLE table_name_6 (week_12_nov_16 VARCHAR, week_3_sept_14 VARCHAR)", "sql": "SELECT week_12_nov_16 FROM table_name_6 WHERE week_3_sept_14 = 'south florida (3-0)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the maximum age of players who have played VR games?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), HasPlayedVR BOOLEAN); INSERT INTO Players (PlayerID, Age, Gender, HasPlayedVR) VALUES (1, 25, 'Male', true), (2, 30, 'Female', false), (3, 22, 'Male', true);", "sql": "SELECT MAX(Age) FROM Players WHERE HasPlayedVR = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Delete the 'peacekeeping_view' view", "schema": "CREATE VIEW peacekeeping_view AS SELECT operation_id, name, location FROM peacekeeping_operations", "sql": "DROP VIEW peacekeeping_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player that has a position of middle blocker, a Nationality of Turkey, and shirt no is 8?", "schema": "CREATE TABLE table_name_36 (player VARCHAR, shirt_no VARCHAR, position VARCHAR, nationality VARCHAR)", "sql": "SELECT player FROM table_name_36 WHERE position = 'middle blocker' AND nationality = 'turkey' AND shirt_no = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Result, when Date is \"June 11, 1994\", and when Venue is \"Miami, United States\"?", "schema": "CREATE TABLE table_name_58 (result VARCHAR, date VARCHAR, venue VARCHAR)", "sql": "SELECT result FROM table_name_58 WHERE date = 'june 11, 1994' AND venue = 'miami, united states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Get the number of volleyball games played in 2022", "schema": "CREATE TABLE volleyball_games (game_date DATE, team1 VARCHAR(255), team2 VARCHAR(255)); INSERT INTO volleyball_games (game_date, team1, team2) VALUES ('2022-01-01', 'Brazil', 'Russia'); INSERT INTO volleyball_games (game_date, team1, team2) VALUES ('2022-01-02', 'Italy', 'USA');", "sql": "SELECT COUNT(*) FROM volleyball_games WHERE YEAR(game_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Year is the highest one that has a Bronze of south korea, and a Silver of philippines?", "schema": "CREATE TABLE table_name_27 (year INTEGER, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_27 WHERE bronze = 'south korea' AND silver = 'philippines';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Away team had an Attendance of 3,395?", "schema": "CREATE TABLE table_name_68 (away_team VARCHAR, attendance VARCHAR)", "sql": "SELECT away_team FROM table_name_68 WHERE attendance = '3,395';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team playing against collingwood?", "schema": "CREATE TABLE table_name_15 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_15 WHERE home_team = 'collingwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Phil Mickelson has what To par?", "schema": "CREATE TABLE table_name_47 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_47 WHERE player = 'phil mickelson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of surface was played on when the score was 2–6, 6–1, [10–5]?", "schema": "CREATE TABLE table_name_7 (surface VARCHAR, score VARCHAR)", "sql": "SELECT surface FROM table_name_7 WHERE score = '2–6, 6–1, [10–5]';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 386).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION cast_context_is( NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many customers from each country have an account balance greater than 10000?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, 'John Doe', 'USA'), (2, 'Jane Smith', 'Canada'), (3, 'Marie Lee', 'France'); CREATE TABLE accounts (id INT, customer_id INT, balance DECIMAL(10, 2)); INSERT INTO accounts (id, customer_id, balance) VALUES (1, 1, 12000.00), (2, 1, 18000.00), (3, 2, 6000.00), (4, 3, 1500.00);", "sql": "SELECT customers.country, COUNT(DISTINCT customers.id) FROM customers INNER JOIN accounts ON customers.id = accounts.customer_id WHERE accounts.balance > 10000 GROUP BY customers.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Update the price of a menu item", "schema": "CREATE TABLE menu_items (item_id INT, item_name VARCHAR(255), price DECIMAL(5,2));", "sql": "UPDATE menu_items SET price = 15.99 WHERE item_id = 678;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score for the team with a time of 2:44?", "schema": "CREATE TABLE table_name_8 (score VARCHAR, time VARCHAR)", "sql": "SELECT score FROM table_name_8 WHERE time = '2:44';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the minimum number of accommodations provided, by accommodation type, for each region?", "schema": "CREATE TABLE Accommodations (ID INT PRIMARY KEY, Region VARCHAR(50), AccommodationType VARCHAR(50), Quantity INT); INSERT INTO Accommodations (ID, Region, AccommodationType, Quantity) VALUES (1, 'North America', 'Sign Language Interpretation', 300), (2, 'North America', 'Wheelchair Ramp', 250), (3, 'South America', 'Assistive Listening Devices', 150), (4, 'Asia', 'Mobility Assistance', 200), (5, 'Europe', 'Sign Language Interpretation', 400), (6, 'Africa', 'Wheelchair Ramp', 100);", "sql": "SELECT Region, AccommodationType, MIN(Quantity) as Minimum FROM Accommodations GROUP BY Region, AccommodationType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Update the total donation amount for donor 'Pedro Garcia' to $4500.", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, country TEXT, total_donation_amount FLOAT); INSERT INTO donors (donor_id, donor_name, country, total_donation_amount) VALUES (1, 'Juan Rodriguez', 'Mexico', 4000.00), (2, 'Natalia Ivanova', 'Russia', 5000.00), (3, 'Pedro Garcia', 'Brazil', 2500.00);", "sql": "WITH updated_pedro_garcia AS (UPDATE donors SET total_donation_amount = 4500.00 WHERE donor_name = 'Pedro Garcia' AND country = 'Brazil' RETURNING *) SELECT * FROM updated_pedro_garcia;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the English word for twie?", "schema": "CREATE TABLE table_name_43 (english VARCHAR, twie VARCHAR, Limburgish VARCHAR)", "sql": "SELECT english FROM table_name_43 WHERE DUTCH(Limburgish) = twie;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Who are the construction workers in Washington with a salary higher than the average salary?", "schema": "CREATE TABLE construction_workers (id INT, name VARCHAR(50), salary DECIMAL(10, 2), state VARCHAR(10)); INSERT INTO construction_workers (id, name, salary, state) VALUES (1, 'John Doe', 60000, 'Washington'); INSERT INTO construction_workers (id, name, salary, state) VALUES (2, 'Jane Smith', 55000, 'Washington');", "sql": "SELECT * FROM construction_workers WHERE state = 'Washington' AND salary > (SELECT AVG(salary) FROM construction_workers WHERE state = 'Washington');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What is the minimum depth of marine protected areas in the Arctic Ocean region?", "schema": "CREATE TABLE arctic_marine_protected_areas (id INT, name TEXT, region TEXT, min_depth FLOAT); INSERT INTO arctic_marine_protected_areas (id, name, region, min_depth) VALUES (1, 'Norwegian Trench', 'Arctic', 3000.0), (2, 'Fram Strait', 'Arctic', 2500.0);", "sql": "SELECT MIN(min_depth) FROM arctic_marine_protected_areas WHERE region = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Which regions have the highest and lowest donation amounts?", "schema": "CREATE TABLE Donations (id INT, region VARCHAR(20), amount FLOAT); INSERT INTO Donations (id, region, amount) VALUES (1, 'Northeast', 25000.00), (2, 'Southeast', 30000.00), (3, 'Midwest', 20000.00), (4, 'Southwest', 15000.00), (5, 'Northwest', 35000.00), (6, 'Asia', 40000.00), (7, 'Africa', 10000.00), (8, 'Europe', 27000.00), (9, 'South America', 22000.00), (10, 'Australia', 32000.00);", "sql": "SELECT region, amount FROM Donations ORDER BY amount DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the Representative Work in a year later than 2005 with a Result of nominated, and an Award of best variety show host?", "schema": "CREATE TABLE table_name_20 (representative_work VARCHAR, award VARCHAR, year VARCHAR, result VARCHAR)", "sql": "SELECT representative_work FROM table_name_20 WHERE year > 2005 AND result = 'nominated' AND award = 'best variety show host';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 43).", "schema": null, "sql": "SELECT * FROM test_squash WHERE id = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n AND data = ANY (ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "Insert new records into the museum_operations table for a new exhibit.", "schema": "CREATE TABLE museum_operations (exhibit_id INT, exhibit_name TEXT, start_date DATE, end_date DATE, daily_visitors INT);", "sql": "INSERT INTO museum_operations (exhibit_id, exhibit_name, start_date, end_date, daily_visitors) VALUES (1001, 'Contemporary Art from Japan', '2023-03-01', '2023-05-31', 500);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'box' (example 8).", "schema": null, "sql": "INSERT INTO BOX_TBL (f1) VALUES ('[1, 2, 3, 4)');", "explanation": "DML from PostgreSQL core regression test for Box.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Update the temperature records for Svalbard with the new measurements.", "schema": "CREATE TABLE arctic_regions (region_id INT, region_name VARCHAR(50)); CREATE TABLE weather (weather_id INT, region_id INT, measurement_date DATE, temperature DECIMAL(5,2)); INSERT INTO arctic_regions (region_id, region_name) VALUES (1, 'Alaska'), (2, 'Greenland'), (3, 'Svalbard'); INSERT INTO weather (weather_id, region_id, measurement_date, temperature) VALUES (1, 1, '2017-01-01', -10.5), (2, 1, '2017-12-31', 15.2), (3, 2, '2017-01-01', -25.6), (4, 2, '2017-12-31', -5.2), (5, 3, '2017-01-01', -12.2);", "sql": "UPDATE weather SET temperature = -3.0 WHERE region_id = (SELECT region_id FROM arctic_regions WHERE region_name = 'Svalbard') AND measurement_date = '2017-01-01'; UPDATE weather SET temperature = -4.2 WHERE region_id = (SELECT region_id FROM arctic_regions WHERE region_name = 'Svalbard') AND measurement_date = '2017-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 325, "num_statements": 2} {"question": "List all military technologies that were used in the last 2 military conflicts, including the technology type and conflict date.", "schema": "CREATE TABLE military_tech_usage (id INT, tech_type TEXT, tech_usage_date DATE, conflict TEXT); INSERT INTO military_tech_usage (id, tech_type, tech_usage_date, conflict) VALUES (1, 'Drones', '2020-02-01', 'Conflict A'), (2, 'Armored Vehicles', '2019-11-15', 'Conflict B');", "sql": "SELECT mt.tech_type, mt.tech_usage_date FROM military_tech_usage mt WHERE mt.tech_usage_date >= '2019-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has a loan as the type?", "schema": "CREATE TABLE table_name_6 (country VARCHAR, type VARCHAR)", "sql": "SELECT country FROM table_name_6 WHERE type = 'loan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players had a best winning average of 20?", "schema": "CREATE TABLE table_27533947_1 (games_won VARCHAR, best_winning_average VARCHAR)", "sql": "SELECT COUNT(games_won) FROM table_27533947_1 WHERE best_winning_average = '20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of Tie no 3?", "schema": "CREATE TABLE table_name_67 (date VARCHAR, tie_no VARCHAR)", "sql": "SELECT date FROM table_name_67 WHERE tie_no = '3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Determine the monthly sales growth of eco-friendly makeup products in the last year.", "schema": "CREATE TABLE MakeupSales (ProductID INT, ProductType VARCHAR(20), IsEcoFriendly BOOLEAN, Revenue DECIMAL(10,2), SaleDate DATE); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (1, 'Lipstick', TRUE, 50.00, '2022-01-15'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (2, 'Eyeshadow', TRUE, 75.00, '2022-02-20'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (3, 'Foundation', TRUE, 60.00, '2022-03-05'); INSERT INTO MakeupSales (ProductID, ProductType, IsEcoFriendly, Revenue, SaleDate) VALUES (4, 'Blush', TRUE, 80.00, '2022-04-10');", "sql": "SELECT EXTRACT(MONTH FROM SaleDate) AS Month, AVG(Revenue) AS AverageRevenue, LAG(AVG(Revenue)) OVER (ORDER BY EXTRACT(MONTH FROM SaleDate)) AS PreviousMonthAverage FROM MakeupSales WHERE ProductType = 'Makeup' AND IsEcoFriendly = TRUE GROUP BY EXTRACT(MONTH FROM SaleDate) ORDER BY EXTRACT(MONTH FROM SaleDate);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 312, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season had an acquisition of free agency, and was higher than 9?", "schema": "CREATE TABLE table_name_89 (season VARCHAR, acquisition_via VARCHAR, number VARCHAR)", "sql": "SELECT season FROM table_name_89 WHERE acquisition_via = 'free agency' AND number > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of factories in the workforce development sector that have implemented workforce diversity initiatives and have a workforce size above 500?", "schema": "CREATE TABLE factories (factory_id INT, sector VARCHAR(255), has_workforce_diversity_initiatives BOOLEAN, workforce_size INT); INSERT INTO factories (factory_id, sector, has_workforce_diversity_initiatives, workforce_size) VALUES (1, 'Workforce Development', TRUE, 600), (2, 'Workforce Development', TRUE, 400), (3, 'Workforce Development', FALSE, 500);", "sql": "SELECT COUNT(*) FROM factories WHERE sector = 'Workforce Development' AND has_workforce_diversity_initiatives = TRUE AND workforce_size > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'eager_aggregate' (example 3).", "schema": null, "sql": "CREATE TABLE eager_agg_t3 (a int, b int, c double precision);", "explanation": "DDL from PostgreSQL core regression test for Eager Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What was the average price per gram of cannabis flower sold by each dispensary in the city of Toronto in the month of February 2022?", "schema": "CREATE TABLE Dispensaries (id INT, name VARCHAR(255), city VARCHAR(255), state VARCHAR(255));CREATE TABLE Inventory (id INT, dispensary_id INT, price DECIMAL(10, 2), product_type VARCHAR(255), grams INT, month INT, year INT);INSERT INTO Dispensaries (id, name, city, state) VALUES (1, 'CannaCorp', 'Toronto', 'ON');INSERT INTO Inventory (id, dispensary_id, price, product_type, grams, month, year) VALUES (1, 1, 20, 'flower', 3.5, 2, 2022);", "sql": "SELECT d.name, AVG(i.price/i.grams) as avg_price_per_gram FROM Dispensaries d JOIN Inventory i ON d.id = i.dispensary_id WHERE d.city = 'Toronto' AND i.product_type = 'flower' AND i.month = 2 AND i.year = 2022 GROUP BY d.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "What is the minimum travel time for an autonomous ride-hailing service in Singapore?", "schema": "CREATE TABLE autonomous_ride_hailing (ride_id INT, travel_time FLOAT, city VARCHAR(50));", "sql": "SELECT MIN(travel_time) FROM autonomous_ride_hailing WHERE city = 'Singapore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 26).", "schema": null, "sql": "-- create the operator class for gist\nCREATE OPERATOR CLASS gist_trgm_ops\nFOR TYPE text USING gist\nAS\n OPERATOR 1 % (text, text),\n FUNCTION 1 gtrgm_consistent (internal, text, smallint, oid, internal),\n FUNCTION 2 gtrgm_union (internal, internal),\n FUNCTION 3 gtrgm_compress (internal),\n FUNCTION 4 gtrgm_decompress (internal),\n FUNCTION 5 gtrgm_penalty (internal, internal, internal),\n FUNCTION 6 gtrgm_picksplit (internal, internal),\n FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),\n STORAGE gtrgm;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 675, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 39).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('1997-02-10 17:32:01-0800');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total revenue for each restaurant, including the sum of sales for all menu items and additional charges?", "schema": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(255)); CREATE TABLE MenuItems (MenuID int, MenuName varchar(255), RestaurantID int, Sales int); CREATE TABLE AdditionalCharges (ChargeID int, ChargeName varchar(255), RestaurantID int, ChargeAmt int);", "sql": "SELECT R.RestaurantName, SUM(M.Sales + AC.ChargeAmt) as TotalRevenue FROM Restaurants R INNER JOIN MenuItems M ON R.RestaurantID = M.RestaurantID INNER JOIN AdditionalCharges AC ON R.RestaurantID = AC.RestaurantID GROUP BY R.RestaurantName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "What is the average budget allocated per public service in the education sector?", "schema": "CREATE TABLE EducationBudget (ID INT, Service VARCHAR(255), Budget INT); INSERT INTO EducationBudget (ID, Service, Budget) VALUES (1, 'Primary Education', 5000000), (2, 'Secondary Education', 6000000), (3, 'Tertiary Education', 8000000);", "sql": "SELECT AVG(Budget) FROM EducationBudget WHERE Service LIKE 'Education%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many parties is the incumbent Bob Brady a member of?", "schema": "CREATE TABLE table_1341423_38 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(party) FROM table_1341423_38 WHERE incumbent = 'Bob Brady';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the change in monthly water usage for each household in Austin from 2020 to 2021?", "schema": "CREATE TABLE Household_Water_Usage (ID INT, Household VARCHAR(20), Year INT, Month INT, Usage FLOAT);", "sql": "SELECT h20_21.Household, AVG(h20_21.Usage - h20_20.Usage) as Avg_Change FROM Household_Water_Usage h20_21, Household_Water_Usage h20_20 WHERE h20_21.Household = h20_20.Household AND h20_20.Year = 2020 AND h20_21.Year = 2021 GROUP BY h20_21.Household;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Insert a new artifact 'Roman Coin' with ArtifactID 4, type 'Coin', quantity 20, and belonging to site 'Pompeii' (SiteID 3).", "schema": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Country TEXT); CREATE TABLE Artifacts (ArtifactID INT, SiteID INT, ArtifactName TEXT, ArtifactType TEXT, Quantity INT);", "sql": "INSERT INTO Artifacts (ArtifactID, SiteID, ArtifactName, ArtifactType, Quantity) VALUES (4, 3, 'Roman Coin', 'Coin', 20);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Present the types of military equipment from the USA", "schema": "CREATE TABLE military_equipment (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), country VARCHAR(255));", "sql": "SELECT type FROM military_equipment WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many ends were won where the blank ends are smaller than 3.0?", "schema": "CREATE TABLE table_25718552_2 (Ends VARCHAR, blank_ends INTEGER)", "sql": "SELECT Ends AS won FROM table_25718552_2 WHERE blank_ends < 3.0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent when the record was 11-6-2?", "schema": "CREATE TABLE table_name_25 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_25 WHERE record = '11-6-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Branding with a Callsign DWLL?", "schema": "CREATE TABLE table_name_99 (branding VARCHAR, callsign VARCHAR)", "sql": "SELECT branding FROM table_name_99 WHERE callsign = 'dwll';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What are the crops planted by farmers aged 40 or older?", "schema": "CREATE TABLE farmers (id INT PRIMARY KEY, name VARCHAR(50), age INT, location VARCHAR(50)); INSERT INTO farmers (id, name, age, location) VALUES (1, 'John Doe', 35, 'New York'); INSERT INTO farmers (id, name, age, location) VALUES (2, 'Jane Smith', 40, 'Los Angeles'); CREATE TABLE crops (id INT PRIMARY KEY, name VARCHAR(50), growth_season VARCHAR(50), planted_by INT, FOREIGN KEY (planted_by) REFERENCES farmers(id)); INSERT INTO crops (id, name, growth_season, planted_by) VALUES (1, 'Corn', 'Summer', 1); INSERT INTO crops (id, name, growth_season, planted_by) VALUES (2, 'Carrots', 'Winter', 2);", "sql": "SELECT crops.name FROM crops INNER JOIN farmers ON crops.planted_by = farmers.id WHERE farmers.age >= 40;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When vp8 ( webm ) is 4.4, how much is vp9 ( webm )", "schema": "CREATE TABLE table_26099252_1 (vp9___webm__ VARCHAR, vp8___webm__ VARCHAR)", "sql": "SELECT vp9___webm__ FROM table_26099252_1 WHERE vp8___webm__ = '4.4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What's the minimum donation amount made by donors from California in the year 2021?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName VARCHAR(100), DonationAmount DECIMAL(10,2), DonationDate DATE, DonorState VARCHAR(50));", "sql": "SELECT MIN(DonationAmount) FROM Donors WHERE DonorState = 'California' AND YEAR(DonationDate) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the station name with at least two trains.", "schema": "CREATE TABLE station (name VARCHAR, station_id VARCHAR); CREATE TABLE train_station (station_id VARCHAR)", "sql": "SELECT T2.name FROM train_station AS T1 JOIN station AS T2 ON T1.station_id = T2.station_id GROUP BY T1.station_id HAVING COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'dblink' (example 24).", "schema": null, "sql": "INSERT INTO foo_1 VALUES (0,'a','{\"a0\",\"b0\",\"c0\"}');", "explanation": "Example query from the 'dblink' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 234).", "schema": null, "sql": "select '[{\"b\": \"c\"}, {\"b\": \"cc\"}]'::jsonb #> array['1','b'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[{\"b\": \"c\"}, {\"b\": \"cc\"}]'::jsonb #> array['1','b']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 60, "num_statements": 1} {"question": "Count the number of unique esports events where at least one player from Asia participated, and the number of unique FPS games played in these events.", "schema": "CREATE TABLE EsportsEvents (EventID INT, EventName VARCHAR(50)); CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Region VARCHAR(20)); CREATE TABLE PlayerEvent (PlayerID INT, EventID INT); CREATE TABLE Games (GameID INT, GameName VARCHAR(50), Genre VARCHAR(20)); CREATE TABLE GameEvent (GameID INT, EventID INT, GameType VARCHAR(10)); CREATE TABLE VR_Games (GameID INT, IsVR INT);", "sql": "SELECT COUNT(DISTINCT EsportsEvents.EventID), COUNT(DISTINCT Games.GameID) FROM EsportsEvents INNER JOIN PlayerEvent ON EsportsEvents.EventID = PlayerEvent.EventID INNER JOIN Players ON PlayerEvent.PlayerID = Players.PlayerID INNER JOIN Games ON GameEvent.GameID = Games.GameID INNER JOIN GameEvent ON EsportsEvents.EventID = GameEvent.EventID INNER JOIN VR_Games ON Games.GameID = VR_Games.GameID WHERE Players.Region = 'Asia' AND Games.Genre = 'FPS';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 452, "num_statements": 1} {"question": "Delete all artifacts related to a specific excavation site", "schema": "CREATE TABLE ExcavationSites (SiteID int, Name varchar(50), Country varchar(50), StartDate date); INSERT INTO ExcavationSites (SiteID, Name, Country, StartDate) VALUES (5, 'Site E', 'Egypt', '2013-11-11'); CREATE TABLE Artifacts (ArtifactID int, SiteID int, Name varchar(50), Description text, DateFound date); INSERT INTO Artifacts (ArtifactID, SiteID, Name, Description, DateFound) VALUES (4, 5, 'Artifact W', 'An Egyptian artifact', '2017-07-07');", "sql": "DELETE FROM Artifacts WHERE SiteID = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Number of patients who did not show improvement after dialectical behavior therapy (DBT) treatment in the USA.", "schema": "CREATE TABLE patients (patient_id INT, country VARCHAR(50)); INSERT INTO patients (patient_id, country) VALUES (1, 'USA'), (2, 'Canada'), (3, 'USA'); CREATE TABLE treatments (patient_id INT, treatment VARCHAR(10), improvement BOOLEAN); INSERT INTO treatments (patient_id, treatment, improvement) VALUES (1, 'DBT', FALSE), (2, 'DBT', TRUE), (3, 'CBT', TRUE);", "sql": "SELECT COUNT(patients.patient_id) FROM patients INNER JOIN treatments ON patients.patient_id = treatments.patient_id WHERE treatments.treatment = 'DBT' AND patients.country = 'USA' AND treatments.improvement = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which season did the Minnesota Kicks lose 13 games and scored 156 points?", "schema": "CREATE TABLE table_name_34 (season VARCHAR, lost VARCHAR, points VARCHAR)", "sql": "SELECT COUNT(season) FROM table_name_34 WHERE lost = 13 AND points = 156;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "List all historical sites in Spain with over 1000 virtual tours, ordered by the number of virtual tours in descending order.", "schema": "CREATE TABLE historical_sites(site_id INT, site_name TEXT, country TEXT, num_virtual_tours INT); INSERT INTO historical_sites(site_id, site_name, country, num_virtual_tours) VALUES (1, 'Alhambra', 'Spain', 1500), (2, 'Sagrada Familia', 'Spain', 1200), (3, 'Mosque of Cordoba', 'Spain', 800);", "sql": "SELECT site_id, site_name, num_virtual_tours FROM historical_sites WHERE country = 'Spain' AND num_virtual_tours > 1000 ORDER BY num_virtual_tours DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Find the percentage of visitors that engaged with online exhibitions from North America and Europe combined.", "schema": "CREATE TABLE Online_Interaction (id INT, user_id INT, interaction_date DATE, country VARCHAR(50)); INSERT INTO Online_Interaction (id, user_id, interaction_date, country) VALUES (1, 1, '2022-05-01', 'USA'), (2, 3, '2022-05-15', 'Canada'), (3, 5, '2022-04-20', 'France'), (4, 7, '2022-03-25', 'UK');", "sql": "SELECT (COUNT(DISTINCT CASE WHEN country IN ('USA', 'Canada', 'France', 'UK') THEN Online_Interaction.user_id END) * 100.0 / COUNT(DISTINCT Online_Interaction.user_id)) as percentage FROM Online_Interaction;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "What is the total number of patients that have completed their treatment for each condition?", "schema": "CREATE TABLE TreatmentOutcomes (TreatmentID int, ConditionID int, Completed int); INSERT INTO TreatmentOutcomes (TreatmentID, ConditionID, Completed) VALUES (1, 1, 1), (2, 1, 0), (3, 2, 1);", "sql": "SELECT Conditions.Condition, SUM(TreatmentOutcomes.Completed) FROM TreatmentOutcomes JOIN Conditions ON TreatmentOutcomes.ConditionID = Conditions.ConditionID GROUP BY Conditions.Condition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Queries (example 64).", "schema": null, "sql": "select tcl_spi_exec(false, 'continue');", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 129).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ 'a.*.e';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the average distance each marathoner ran in the Olympics?", "schema": "CREATE TABLE olympic_marathon (athlete VARCHAR(50), distance INT); INSERT INTO olympic_marathon (athlete, distance) VALUES ('Eliud Kipchoge', 42195), ('Feyisa Lilesa', 42320), ('Galen Rupp', 42200);", "sql": "SELECT AVG(distance) AS avg_distance FROM olympic_marathon;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 43).", "schema": null, "sql": "INSERT INTO f_star (class, a, c, f)\n VALUES ('f', 21, 'hi marcel'::name, '(11,44),(22,55),(33,66)'::polygon);", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the average lead time for each fabric supplier in the Asia-Pacific region?", "schema": "CREATE TABLE suppliers (supplier_id INT, supplier_name VARCHAR(50), location VARCHAR(50), lead_time INT);", "sql": "SELECT location, AVG(lead_time) as avg_lead_time FROM suppliers WHERE location LIKE '%Asia-Pacific%' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum avg/g with an effic of 858.4?", "schema": "CREATE TABLE table_name_61 (avg_g INTEGER, effic VARCHAR)", "sql": "SELECT SUM(avg_g) FROM table_name_61 WHERE effic = 858.4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 214).", "schema": null, "sql": "select * from exists_tbl t1\n where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0);", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from exists_tbl t1\n where (exists(select 1 from exists_tbl t2 where t1.c1 = t2.c2) or c3 < 0)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 70).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (1,2,'-994877526002806872754342148663997.64812998474240514147207095573950146764154822009863493316394610578375247334825932838513167168342610420582834742950389452212867974756590355021495169819086060202117180229196935525386766373096687306110481009743118940565957556492470398904849289222365256698601073536111216152709126800604695001949246634784573028721762079936564434050796321975774729383704426321489070979168993853338252728216162346796960170352897972568238870481118474064783391570102958474141459619245240874849766946530000977144965');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 563, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the score for game for 25", "schema": "CREATE TABLE table_17103729_8 (score VARCHAR, game VARCHAR)", "sql": "SELECT score FROM table_17103729_8 WHERE game = 25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PL/pgSQL test: Plperl (example 61).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION foo_good() RETURNS SETOF footype AS $$\nreturn [\n {x => 1, y => 2},\n {x => 3, y => 4}\n];\n$$ LANGUAGE plperl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 140, "num_statements": 2} {"question": "What is the average number of training hours for employees in the 'Marketing' department who have completed diversity and inclusion training?", "schema": "CREATE TABLE Employee_Training (Employee_ID INT, Employee_Name VARCHAR(50), Department VARCHAR(50), Training_Type VARCHAR(50), Hours_Spent DECIMAL(5,2)); INSERT INTO Employee_Training (Employee_ID, Employee_Name, Department, Training_Type, Hours_Spent) VALUES (6, 'Alex Johnson', 'Marketing', 'Diversity and Inclusion', 5.00), (7, 'Taylor Lee', 'Marketing', 'Diversity and Inclusion', 4.00), (8, 'Jasmine Brown', 'Marketing', 'Cybersecurity', 7.00);", "sql": "SELECT AVG(Hours_Spent) FROM Employee_Training WHERE Department = 'Marketing' AND Training_Type = 'Diversity and Inclusion';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'select_implicit' (example 10).", "schema": null, "sql": "INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');", "explanation": "DML from PostgreSQL core regression test for Select Implicit.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the percentage of climate finance that went to climate adaptation projects in South America between 2010 and 2015?", "schema": "CREATE TABLE climate_finance (region VARCHAR(255), year INT, project_type VARCHAR(255), amount FLOAT);", "sql": "SELECT (SUM(CASE WHEN project_type = 'climate adaptation' THEN amount ELSE 0 END) / SUM(amount)) * 100 AS adaptation_percentage FROM climate_finance WHERE year BETWEEN 2010 AND 2015 AND region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "What is the maximum property price in each neighborhood in San Francisco?", "schema": "CREATE TABLE sf_neighborhoods (id INT, name VARCHAR(50)); INSERT INTO sf_neighborhoods (id, name) VALUES (1, 'Mission'), (2, 'Castro'), (3, 'Haight-Ashbury'); CREATE TABLE properties (id INT, neighborhood_id INT, price INT); INSERT INTO properties (id, neighborhood_id, price) VALUES (1, 1, 500000), (2, 2, 400000), (3, 1, 600000);", "sql": "SELECT properties.neighborhood_id, MAX(properties.price) FROM properties GROUP BY properties.neighborhood_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the minimum installed capacity of wind turbines for all renewable energy projects in Germany?", "schema": "CREATE TABLE renewable_energy (project_name VARCHAR(50), country VARCHAR(50), wind_capacity INT); INSERT INTO renewable_energy (project_name, country, wind_capacity) VALUES ('Project1', 'Germany', 5000), ('Project2', 'Germany', 12000), ('Project3', 'Germany', 8000);", "sql": "SELECT MIN(wind_capacity) FROM renewable_energy WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 376).", "schema": null, "sql": "select jsonb_path_query('0', '$.boolean()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('0', '$.boolean()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 515).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION function_returns( NAME, NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent in the final on 24 November 2008?", "schema": "CREATE TABLE table_name_43 (opponent_in_the_final VARCHAR, date VARCHAR)", "sql": "SELECT opponent_in_the_final FROM table_name_43 WHERE date = '24 november 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 795).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total number of military personnel involved in cybersecurity operations in Asia?", "schema": "CREATE TABLE MilitaryCyberOps (Id INT, Region VARCHAR(50), Personnel INT, Year INT); INSERT INTO MilitaryCyberOps (Id, Region, Personnel, Year) VALUES (1, 'Asia', 200, 2021); INSERT INTO MilitaryCyberOps (Id, Region, Personnel, Year) VALUES (2, 'Europe', 300, 2021);", "sql": "SELECT SUM(Personnel) FROM MilitaryCyberOps WHERE Region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total amount donated and number of donations for each quarter in the 'donations' table?", "schema": "CREATE TABLE donations (donation_id INT, donation_date DATE, donation_amount FLOAT); INSERT INTO donations (donation_id, donation_date, donation_amount) VALUES (1, '2022-01-01', 50.00), (2, '2022-02-01', 100.00), (3, '2022-03-01', 150.00);", "sql": "SELECT DATE_TRUNC('quarter', donation_date) as quarter, SUM(donation_amount) as total_donation, COUNT(donation_id) as num_donations FROM donations GROUP BY quarter ORDER BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "List the names of investors who have invested in companies that have a female founder.", "schema": "CREATE TABLE Companies (id INT, name TEXT, founder_gender TEXT); INSERT INTO Companies (id, name, founder_gender) VALUES (1, 'Daisy Enterprise', 'Female'); INSERT INTO Companies (id, name, founder_gender) VALUES (2, 'Bright Star Corp', 'Male'); CREATE TABLE Investors (id INT, name TEXT); INSERT INTO Investors (id, name) VALUES (1, 'Venture Capital 3'); INSERT INTO Investors (id, name) VALUES (2, 'Angel Investor 3');", "sql": "SELECT Investors.name FROM Companies INNER JOIN Investors ON TRUE WHERE Companies.founder_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'generated_stored' (example 54).", "schema": null, "sql": "INSERT INTO gtest1v VALUES (4, 8); -- error\nINSERT INTO gtest1v VALUES (5, DEFAULT); -- ok\nINSERT INTO gtest1v VALUES (6, 66), (7, 77); -- error\nINSERT INTO gtest1v VALUES (6, DEFAULT), (7, 77); -- error\nINSERT INTO gtest1v VALUES (6, 66), (7, DEFAULT); -- error\nINSERT INTO gtest1v VALUES (6, DEFAULT), (7, DEFAULT); -- ok\n\nALTER VIEW gtest1v ALTER COLUMN b SET DEFAULT 100;", "explanation": "DML from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 381, "num_statements": 7} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 72).", "schema": null, "sql": "SELECT xmlroot(xml '', version no value, standalone no value);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlroot(xml '', version no value, standalone no value)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 48).", "schema": null, "sql": "SELECT * FROM check_test(\n has_view( '__SDFSDFD__', 'howdy' ),\n false,\n 'has_view(non-existent view, desc)',\n 'howdy',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the total number of virtual tour views for hotels in Miami?", "schema": "CREATE TABLE cultural_heritage (site_id INT, hotel_id INT, attendance INT); INSERT INTO cultural_heritage (site_id, hotel_id, attendance) VALUES (1, 1, 500), (2, 2, 300);", "sql": "SELECT SUM(vt.num_views) FROM virtual_tours vt INNER JOIN hotel_info hi ON vt.hotel_id = hi.hotel_id WHERE hi.city = 'Miami';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Which teams have the highest and lowest average ticket prices for regular seats?", "schema": "CREATE TABLE Teams (TeamID INT, TeamName VARCHAR(50), AvgRegularTicketPrice DECIMAL(5,2));", "sql": "SELECT TeamName FROM Teams WHERE AvgRegularTicketPrice = (SELECT MAX(AvgRegularTicketPrice) FROM Teams) OR AvgRegularTicketPrice = (SELECT MIN(AvgRegularTicketPrice) FROM Teams);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What kind of Entrant has a Maserati Straight-6 Engine, built in the year 1958 or early, and has a Maserati 250f Chassis with points less than 4?", "schema": "CREATE TABLE table_name_95 (entrant VARCHAR, points VARCHAR, chassis VARCHAR, engine VARCHAR, year VARCHAR)", "sql": "SELECT entrant FROM table_name_95 WHERE engine = 'maserati straight-6' AND year < 1958 AND chassis = 'maserati 250f' AND points < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER INDEX (example 4).", "schema": null, "sql": "CREATE INDEX coord_idx ON measured (x, y, (z + t)); ALTER INDEX coord_idx ALTER COLUMN 3 SET STATISTICS 1000;", "explanation": "PostgreSQL ALTER INDEX command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 2} {"question": "Show a SQL definition from the timescaledb project (create_table_with, item 25).", "schema": null, "sql": "CREATE TABLE t7(time timestamptz, device text, value float) WITH (timescaledb.hypertable,tsdb.partition_column='time');", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did they win 7 races?", "schema": "CREATE TABLE table_24937583_1 (seasons VARCHAR, wins VARCHAR)", "sql": "SELECT seasons FROM table_24937583_1 WHERE wins = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_partitioning_utils, item 44).", "schema": null, "sql": "CREATE TABLE partition_child_1_schema.child_1 (id int NOT NULL, time date );", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least frequency Mhz with call sign of k202ag", "schema": "CREATE TABLE table_name_24 (frequency_mhz INTEGER, call_sign VARCHAR)", "sql": "SELECT MIN(frequency_mhz) FROM table_name_24 WHERE call_sign = 'k202ag';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want to know the tie for drawn of 47", "schema": "CREATE TABLE table_name_46 (tied VARCHAR, drawn VARCHAR)", "sql": "SELECT tied FROM table_name_46 WHERE drawn = '47';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average rating for each product, and the number of ratings for each product, ordered by the average rating in descending order?", "schema": "CREATE TABLE product (product_id INT, name VARCHAR(50), rating DECIMAL(3,2), num_ratings INT); INSERT INTO product VALUES (1, 'Product A', 4.5, 100), (2, 'Product B', 3.5, 200), (3, 'Product C', 5.0, 50);", "sql": "SELECT name, AVG(rating) as avg_rating, num_ratings FROM product GROUP BY name ORDER BY avg_rating DESC, num_ratings DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the order dates of all the bookings.", "schema": "CREATE TABLE BOOKINGS (Order_Date VARCHAR)", "sql": "SELECT Order_Date FROM BOOKINGS;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Update a record with community health statistic for a specific community type", "schema": "CREATE TABLE community_health_statistics_v2 (id INT, community_type VARCHAR(20), statistic_value INT);", "sql": "UPDATE community_health_statistics_v2 SET statistic_value = 110 WHERE community_type = 'Urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 26).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_compression_check(config jsonb) RETURNS void LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.policy_compression_check(jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n PERFORM _timescaledb_functions.policy_compression_check($1);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 544, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: What is the rank of the arch with a length in meters of 75/55?", "schema": "CREATE TABLE table_name_29 (rank VARCHAR, length___m__ VARCHAR)", "sql": "SELECT rank FROM table_name_29 WHERE length___m__ = '75/55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Valueset (assertion 274).", "schema": null, "sql": "-- Make sure that dupes are not ignored.\nSELECT * FROM check_test(\n bag_has( 'anames', 'VALUES (44, ''Anna''), (44, ''Anna'')' ),\n false,\n 'bag_has( prepared, dupes )',\n '',\n ' Missing records:\n (44,Anna)'\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "List all wells that were drilled in the 'Haynesville' shale play and had production greater than 2000 in any quarter.", "schema": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), shale_play VARCHAR(50), production_q1 FLOAT, production_q2 FLOAT, production_q3 FLOAT, production_q4 FLOAT); INSERT INTO wells (well_id, well_name, shale_play, production_q1, production_q2, production_q3, production_q4) VALUES (1, 'Well M', 'Haynesville', 2200, 2400, 2600, 2800), (2, 'Well N', 'Barnett', 1900, 2150, 2400, 2650);", "sql": "SELECT well_name FROM wells WHERE shale_play = 'Haynesville' AND (production_q1 > 2000 OR production_q2 > 2000 OR production_q3 > 2000 OR production_q4 > 2000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 73).", "schema": null, "sql": "SELECT '{\"a\":\"b\", \"b\":1, \"c\":null}'::jsonb @> '{\"g\":null}';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '{\"a\":\"b\", \"b\":1, \"c\":null}'::jsonb @> '{\"g\":null}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Championship larger than 3, and a FA Cup smaller than 3, and a Total smaller than 6 involves what highest league cup?", "schema": "CREATE TABLE table_name_27 (league_cup INTEGER, total VARCHAR, championship VARCHAR, fa_cup VARCHAR)", "sql": "SELECT MAX(league_cup) FROM table_name_27 WHERE championship > 3 AND fa_cup < 3 AND total < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Update team names to uppercase in the 'esports_teams' table", "schema": "CREATE TABLE esports_teams (team_id INT, team_name VARCHAR(50));", "sql": "UPDATE esports_teams SET team_name = UPPER(team_name);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number of silver medals with a rank of 4 and total medals greater than 1?", "schema": "CREATE TABLE table_name_68 (silver INTEGER, rank VARCHAR, total VARCHAR)", "sql": "SELECT MIN(silver) FROM table_name_68 WHERE rank = 4 AND total > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the call sign for ERP W of 99?", "schema": "CREATE TABLE table_name_68 (call_sign VARCHAR, erp_w VARCHAR)", "sql": "SELECT call_sign FROM table_name_68 WHERE erp_w = 99;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the maximum depth reached by a marine species in the Southern Ocean, along with its habitat and species name?", "schema": "CREATE TABLE southern_ocean_depths (id INT, species_name VARCHAR(255), depth FLOAT, habitat VARCHAR(255)); INSERT INTO southern_ocean_depths (id, species_name, depth, habitat) VALUES (1, 'Southern Right Whale', 300, 'Coastal');", "sql": "SELECT species_name, depth, habitat FROM (SELECT species_name, depth, habitat, MAX(depth) OVER (PARTITION BY ocean) AS max_depth FROM southern_ocean_depths WHERE ocean = 'Southern Ocean') t WHERE depth = max_depth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 214, "num_statements": 1} {"question": "What is the maximum ESG score for companies in the energy sector in Q4 2020?", "schema": "CREATE TABLE if not exists companies (company_id INT, sector VARCHAR(50), esg_score DECIMAL(3,2), quarter INT, year INT); INSERT INTO companies (company_id, sector, esg_score, quarter, year) VALUES (1, 'Energy', 8.4, 4, 2020), (2, 'Energy', 9.2, 4, 2020), (3, 'Energy', 8.9, 4, 2020);", "sql": "SELECT MAX(esg_score) FROM companies WHERE sector = 'Energy' AND quarter = 4 AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List all organizations in the 'environment' category along with the total donation amount they received.", "schema": "CREATE TABLE organizations (id INT, name VARCHAR(255), category VARCHAR(255)); INSERT INTO organizations (id, name, category) VALUES (1, 'Greenpeace', 'environment'), (2, 'Climate Action', 'environment'), (3, 'Education Alliance', 'education'); CREATE TABLE donations (id INT, organization_id INT, amount DECIMAL(10, 2)); INSERT INTO donations (id, organization_id, amount) VALUES (1, 1, 1000), (2, 1, 2000), (3, 2, 500), (4, 2, 1500), (5, 3, 3000);", "sql": "SELECT organizations.name, SUM(donations.amount) AS total_donation FROM organizations INNER JOIN donations ON organizations.id = donations.organization_id WHERE organizations.category = 'environment' GROUP BY organizations.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 228, "num_statements": 1} {"question": "What is the average age of language speakers in the 'community_engagement' table?", "schema": "CREATE TABLE community_engagement (id INT, name VARCHAR(50), language VARCHAR(50), age INT); INSERT INTO community_engagement (id, name, language, age) VALUES (1, 'John Doe', 'English', 45), (2, 'Jane Smith', 'Spanish', 32);", "sql": "SELECT AVG(age) FROM community_engagement WHERE language IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: display the job title of jobs which minimum salary is greater than 9000.", "schema": "CREATE TABLE jobs (job_title VARCHAR, min_salary INTEGER)", "sql": "SELECT job_title FROM jobs WHERE min_salary > 9000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Competition in Main Article of Bradford Bulls 1997 have Robbie Paul as Captain with less than 5 Lost?", "schema": "CREATE TABLE table_name_54 (competition VARCHAR, main_article VARCHAR, captain VARCHAR, lost VARCHAR)", "sql": "SELECT competition FROM table_name_54 WHERE captain = 'robbie paul' AND lost < 5 AND main_article = 'bradford bulls 1997';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Which cruelty-free certified products are not vegan-friendly?", "schema": "CREATE TABLE product (id INT, name VARCHAR(50), is_cruelty_free BOOLEAN, is_vegan BOOLEAN); INSERT INTO product (id, name, is_cruelty_free, is_vegan) VALUES (1, 'Lipstick A', true, false), (2, 'Eye Shadow B', true, true);", "sql": "SELECT name FROM product WHERE is_cruelty_free = true AND is_vegan = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many vessels with flag_state 'Panama' were active in the last month?", "schema": "CREATE TABLE vessel_activity (id INT, vessel_name VARCHAR(50), flag_state VARCHAR(50), activity_date DATE); INSERT INTO vessel_activity (id, vessel_name, flag_state, activity_date) VALUES (1, 'Panama Titan', 'Panama', '2022-03-20'), (2, 'Panama Titan', 'Panama', '2022-03-23');", "sql": "SELECT COUNT(DISTINCT vessel_name) FROM vessel_activity WHERE flag_state = 'Panama' AND activity_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND CURRENT_DATE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score for the game in which Carmelo Anthony (26) was the high points scorer?", "schema": "CREATE TABLE table_name_78 (score VARCHAR, high_points VARCHAR)", "sql": "SELECT score FROM table_name_78 WHERE high_points = 'carmelo anthony (26)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many times has a specific IP address been associated with phishing attacks in the last month?", "schema": "CREATE TABLE phishing_attacks (id INT, ip_address VARCHAR(255), date DATE);", "sql": "SELECT COUNT(*) FROM phishing_attacks WHERE ip_address = '192.168.1.1' AND date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 397).", "schema": null, "sql": "INSERT INTO pktable VALUES (5, 10);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the creation year, name and budget of each department.", "schema": "CREATE TABLE department (creation VARCHAR, name VARCHAR, budget_in_billions VARCHAR)", "sql": "SELECT creation, name, budget_in_billions FROM department;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 222).", "schema": null, "sql": "SELECT is(\n _temptable( '\"something cool\"', '__spacenames__' ),\n '__spacenames__',\n 'Should create a temp table for a prepared statement with space'\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 161).", "schema": null, "sql": "DELETE FROM temporal_mltrng;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "What is the average horsepower of electric vehicles in the vehicle_safety_testing table?", "schema": "CREATE TABLE vehicle_safety_testing (vehicle_id INT, vehicle_name VARCHAR(50), horsepower INT, safety_rating FLOAT);", "sql": "SELECT AVG(horsepower) FROM vehicle_safety_testing WHERE vehicle_type = 'Electric';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the year when the location is san diego, california and defeated is houston baptist (texas)?", "schema": "CREATE TABLE table_name_96 (year VARCHAR, location VARCHAR, defeated VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_96 WHERE location = 'san diego, california' AND defeated = 'houston baptist (texas)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 40).", "schema": null, "sql": "SELECT * FROM check_test(\n relation_owner_is('__not__public', 'someseq', current_user, 'mumble'),\n\tfalse,\n 'relation_owner_is(non-sch, seq, user)',\n 'mumble',\n ' Relation __not__public.someseq does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "What is the number of employees working in each mining operation type?", "schema": "CREATE TABLE workforce (id INT, name VARCHAR(50), position VARCHAR(50), department VARCHAR(50), operation_type VARCHAR(50)); INSERT INTO workforce (id, name, position, department, operation_type) VALUES (1, 'John Doe', 'Engineer', 'Mining', 'Coal'), (2, 'Jane Smith', 'Technician', 'Environment', 'Gold'), (3, 'Alice Johnson', 'Manager', 'Operations', 'Gold');", "sql": "SELECT operation_type, COUNT(*) as num_employees FROM workforce GROUP BY operation_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total revenue of organic products sold by vendors with a high ethical labor score?", "schema": "CREATE TABLE vendors (vendor_id INT, ethical_score INT); INSERT INTO vendors (vendor_id, ethical_score) VALUES (1, 90), (2, 75), (3, 85); CREATE TABLE products (product_id INT, organic BOOLEAN); INSERT INTO products (product_id, organic) VALUES (101, TRUE), (102, FALSE), (103, TRUE); CREATE TABLE sales (sale_id INT, vendor_id INT, product_id INT, revenue INT); INSERT INTO sales (sale_id, vendor_id, product_id, revenue) VALUES (1, 1, 101, 500), (2, 1, 102, 300), (3, 2, 101, 400), (4, 3, 103, 600);", "sql": "SELECT SUM(sales.revenue) FROM sales JOIN vendors ON sales.vendor_id = vendors.vendor_id JOIN products ON sales.product_id = products.product_id WHERE products.organic = TRUE AND vendors.ethical_score >= 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Find the difference in transaction amount between the first and last transaction for each customer.", "schema": "CREATE TABLE customer_transactions (transaction_date DATE, customer_id INT, transaction_amt DECIMAL(10, 2)); INSERT INTO customer_transactions (transaction_date, customer_id, transaction_amt) VALUES ('2022-01-01', 1, 200.00), ('2022-01-05', 1, 300.00), ('2022-01-01', 2, 100.00);", "sql": "SELECT customer_id, FIRST_VALUE(transaction_amt) OVER (PARTITION BY customer_id ORDER BY transaction_date) AS first_transaction_amt, LAST_VALUE(transaction_amt) OVER (PARTITION BY customer_id ORDER BY transaction_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_transaction_amt, last_transaction_amt - first_transaction_amt AS transaction_diff FROM customer_transactions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 393, "num_statements": 1} {"question": "What is the number of workers involved in fair trade and living wage practices?", "schema": "CREATE TABLE labor_practices (id INT, supplier VARCHAR(255), practice VARCHAR(255), num_workers INT); INSERT INTO labor_practices (id, supplier, practice, num_workers) VALUES (1, 'Supplier A', 'Fair Trade', 50), (2, 'Supplier B', 'Living Wage', 75), (3, 'Supplier C', 'Fair Trade', 100), (4, 'Supplier D', 'Living Wage', 125), (5, 'Supplier E', 'Fair Trade', 150);", "sql": "SELECT practice, SUM(num_workers) FROM labor_practices WHERE practice IN ('Fair Trade', 'Living Wage') GROUP BY practice;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Find the total number of indigenous communities in the 'Arctic_Communities' table that have a population size greater than any community in the 'Antarctic_Communities' table.", "schema": "CREATE TABLE Arctic_Communities (name TEXT, population INTEGER); CREATE TABLE Antarctic_Communities (name TEXT, population INTEGER);", "sql": "SELECT COUNT(*) FROM Arctic_Communities WHERE Arctic_Communities.population > (SELECT MAX(population) FROM Antarctic_Communities);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "How many hybrid cars and hybrid trucks are in the fleet_inventory table?", "schema": "CREATE TABLE fleet_inventory (id INT, ev_type VARCHAR(20), quantity INT); INSERT INTO fleet_inventory (id, ev_type, quantity) VALUES (1, 'electric_car', 50), (2, 'hybrid_car', 30), (3, 'electric_truck', 10), (4, 'hybrid_truck', 20);", "sql": "SELECT SUM(quantity) FROM fleet_inventory WHERE ev_type LIKE 'hybrid%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What's the total revenue of Marvel movies released in 2019?", "schema": "CREATE TABLE movies (id INT, title VARCHAR(255), release_year INT, production_budget INT, revenue INT); INSERT INTO movies (id, title, release_year, production_budget, revenue) VALUES (1, 'Avengers: Endgame', 2019, 356000000, 27978000000), (2, 'Captain Marvel', 2019, 153000000, 11285000000);", "sql": "SELECT SUM(revenue) FROM movies WHERE release_year = 2019 AND title IN ('Avengers: Endgame', 'Captain Marvel');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the name and location of all heritage sites with no community engagement events?", "schema": "CREATE TABLE events (id INT, name VARCHAR, site_id INT); INSERT INTO events (id, name, site_id) VALUES (1, 'Event A', 1), (2, 'Event B', 2); CREATE TABLE heritage_sites (id INT, name VARCHAR, location VARCHAR); INSERT INTO heritage_sites (id, name, location) VALUES (1, 'Heritage Site A', 'City A'), (2, 'Heritage Site B', 'City B'), (3, 'Heritage Site C', 'City C');", "sql": "SELECT heritage_sites.name, heritage_sites.location FROM heritage_sites LEFT JOIN events ON heritage_sites.id = events.site_id WHERE events.id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Find the maximum altitude reached by 'Boeing' and 'Airbus' aircraft models.", "schema": "CREATE TABLE Flight_Altitude (aircraft_model VARCHAR(255), altitude INT); INSERT INTO Flight_Altitude (aircraft_model, altitude) VALUES ('B737', 40000), ('A320', 41000), ('B747', 43000), ('B787', 44000), ('A350', 45000), ('A380', 46000), ('Boeing', 47000), ('Airbus', 48000);", "sql": "SELECT aircraft_model, altitude FROM Flight_Altitude WHERE aircraft_model IN ('Boeing', 'Airbus') AND altitude = (SELECT MAX(altitude) FROM Flight_Altitude WHERE aircraft_model IN ('Boeing', 'Airbus'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What chassis has the year 1987?", "schema": "CREATE TABLE table_name_87 (chassis VARCHAR, year VARCHAR)", "sql": "SELECT chassis FROM table_name_87 WHERE year = 1987;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Points Against, when Drawn is \"2\", and when Points Of is \"32\"?", "schema": "CREATE TABLE table_name_80 (points_against VARCHAR, drawn VARCHAR, points VARCHAR)", "sql": "SELECT points_against FROM table_name_80 WHERE drawn = '2' AND points = '32';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What are the unique threat_actors that have been involved in both 'Network Intrusions' and 'Data Exfiltration' incidents?", "schema": "CREATE TABLE incidents (id INT, threat_actor VARCHAR(255), incident_type VARCHAR(255)); INSERT INTO incidents (id, threat_actor, incident_type) VALUES (1, 'APT28', 'Network Intrusion'), (2, 'APT33', 'Data Exfiltration'), (3, 'APT34', 'Network Intrusion'), (4, 'APT29', 'Data Exfiltration'), (5, 'APT35', 'Network Intrusion'), (6, 'APT28', 'Data Exfiltration');", "sql": "SELECT threat_actor FROM incidents WHERE incident_type = 'Network Intrusion' INTERSECT SELECT threat_actor FROM incidents WHERE incident_type = 'Data Exfiltration';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "How many military equipment maintenance requests were there in Q2 2020?", "schema": "CREATE TABLE maintenance_requests (request_id INT, date DATE, type VARCHAR(255)); INSERT INTO maintenance_requests (request_id, date, type) VALUES (1, '2020-01-01', 'equipment'); INSERT INTO maintenance_requests (request_id, date, type) VALUES (2, '2020-01-15', 'facility');", "sql": "SELECT COUNT(*) FROM maintenance_requests WHERE date BETWEEN '2020-04-01' AND '2020-06-30' AND type = 'equipment';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Away with a Time that is 14:00?", "schema": "CREATE TABLE table_name_28 (away VARCHAR, time VARCHAR)", "sql": "SELECT away FROM table_name_28 WHERE time = '14:00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 14).", "schema": null, "sql": "INSERT INTO t4 VALUES (1, 'mmm'), (2, 'nnn'), (3, 'ooo');", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location for the game on October 20 with it's corresponding attendance?", "schema": "CREATE TABLE table_28768469_2 (location_attendance VARCHAR, date VARCHAR)", "sql": "SELECT location_attendance FROM table_28768469_2 WHERE date = 'October 20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Runners-Up, when Champions is greater than 5?", "schema": "CREATE TABLE table_name_2 (runners_up INTEGER, champions INTEGER)", "sql": "SELECT SUM(runners_up) FROM table_name_2 WHERE champions > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total revenue for the 'Burger' category?", "schema": "CREATE TABLE restaurant_revenue (item_category VARCHAR(20), daily_revenue DECIMAL(10,2)); INSERT INTO restaurant_revenue (item_category, daily_revenue) VALUES ('Burger', 1500.00), ('Pizza', 1200.00), ('Salad', 800.00);", "sql": "SELECT SUM(daily_revenue) FROM restaurant_revenue WHERE item_category = 'Burger';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What was the total amount of funds spent by UN agencies in Syria in 2017?", "schema": "CREATE TABLE un_agencies (agency_name VARCHAR(255), country VARCHAR(255), funds_spent DECIMAL(10,2), funds_date DATE); INSERT INTO un_agencies (agency_name, country, funds_spent, funds_date) VALUES ('UND', 'Syria', 90000, '2017-02-25'), ('UNE', 'Syria', 100000, '2017-08-17'), ('UNF', 'Syria', 110000, '2017-11-29');", "sql": "SELECT SUM(funds_spent) FROM un_agencies WHERE country = 'Syria' AND YEAR(funds_date) = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the fastest lap when pole position was damon hill and the location was magny-cours?", "schema": "CREATE TABLE table_name_33 (fastest_lap VARCHAR, pole_position VARCHAR, location VARCHAR)", "sql": "SELECT fastest_lap FROM table_name_33 WHERE pole_position = 'damon hill' AND location = 'magny-cours';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times did outgoing manager Bart de Roover vacated a position?", "schema": "CREATE TABLE table_27374004_4 (date_of_vacancy VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT COUNT(date_of_vacancy) FROM table_27374004_4 WHERE outgoing_manager = 'Bart De Roover';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total data usage for the month of March?", "schema": "CREATE TABLE data_usage (id INT, subscriber_id INT, usage FLOAT, month TEXT); INSERT INTO data_usage (id, subscriber_id, usage, month) VALUES (1, 1, 15.0, 'March'); INSERT INTO data_usage (id, subscriber_id, usage, month) VALUES (2, 2, 20.0, 'March'); INSERT INTO data_usage (id, subscriber_id, usage, month) VALUES (3, 3, 10.0, 'March'); INSERT INTO data_usage (id, subscriber_id, usage, month) VALUES (4, 1, 20.0, 'April');", "sql": "SELECT SUM(usage) FROM data_usage WHERE month = 'March';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of reviewers who had rated 3 star and 4 star?", "schema": "CREATE TABLE Reviewer (name VARCHAR, rID VARCHAR); CREATE TABLE Rating (rID VARCHAR, stars VARCHAR)", "sql": "SELECT T2.name FROM Rating AS T1 JOIN Reviewer AS T2 ON T1.rID = T2.rID WHERE T1.stars = 3 INTERSECT SELECT T2.name FROM Rating AS T1 JOIN Reviewer AS T2 ON T1.rID = T2.rID WHERE T1.stars = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "PostgreSQL regression test 'dbsize': Write the SELECT query (example 20).", "schema": null, "sql": "SELECT pg_size_bytes('-.kb');", "explanation": "Regression test for Dbsize in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_size_bytes('-.kb')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the number of female students (with F sex) living in Smith Hall", "schema": "CREATE TABLE lives_in (stuid VARCHAR, dormid VARCHAR); CREATE TABLE student (stuid VARCHAR, sex VARCHAR); CREATE TABLE dorm (dormid VARCHAR, dorm_name VARCHAR)", "sql": "SELECT COUNT(*) FROM student AS T1 JOIN lives_in AS T2 ON T1.stuid = T2.stuid JOIN dorm AS T3 ON T3.dormid = T2.dormid WHERE T3.dorm_name = 'Smith Hall' AND T1.sex = 'F';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Round din Ullevaal?", "schema": "CREATE TABLE table_name_23 (round VARCHAR, venue VARCHAR)", "sql": "SELECT round FROM table_name_23 WHERE venue = 'ullevaal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the distribution of genres in our movie database?", "schema": "CREATE TABLE Movies (id INT, title VARCHAR(255), genre VARCHAR(255));", "sql": "SELECT genre, COUNT(*) AS count FROM Movies GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Find the circular economy initiatives that have the highest recycling rates.", "schema": "CREATE TABLE Initiatives (InitiativeID INT, InitiativeName VARCHAR(50), RecyclingRate FLOAT); INSERT INTO Initiatives VALUES (1, 'Initiative1', 0.7), (2, 'Initiative2', 0.6), (3, 'Initiative3', 0.8), (4, 'Initiative4', 0.5);", "sql": "SELECT InitiativeName, RecyclingRate FROM Initiatives WHERE RecyclingRate = (SELECT MAX(RecyclingRate) FROM Initiatives);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 46).", "schema": null, "sql": "-- B-tree support\n\nCREATE OPERATOR CLASS ltree_ops\n DEFAULT FOR TYPE ltree USING btree AS\n OPERATOR 1 < ,\n OPERATOR 2 <= ,\n OPERATOR 3 = ,\n OPERATOR 4 >= ,\n OPERATOR 5 > ,\n FUNCTION 1 ltree_cmp(ltree, ltree);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 331, "num_statements": 1} {"question": "What is the total cost of fair trade coffee products in the last 6 months?", "schema": "CREATE TABLE Dates (date DATE); CREATE TABLE Sales (sale_id INT, date DATE, product_id INT, quantity INT, cost INT); CREATE TABLE Products (product_id INT, product_name VARCHAR(255), is_fair_trade BOOLEAN);", "sql": "SELECT SUM(s.cost) as total_cost FROM Sales s JOIN Dates d ON s.date = d.date JOIN Products p ON s.product_id = p.product_id WHERE d.date >= DATE(NOW()) - INTERVAL 6 MONTH AND p.is_fair_trade = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "What is the average safety score for each AI algorithm, partitioned by algorithm type, ordered by score in descending order?", "schema": "CREATE TABLE ai_algorithms (algorithm_id INT, algorithm_name VARCHAR(50), safety_score DECIMAL(5,2)); INSERT INTO ai_algorithms (algorithm_id, algorithm_name, safety_score) VALUES (1, 'DeepQA', 85.34), (2, 'Random Forest', 91.23), (3, 'Support Vector Machine', 89.11), (4, 'Neural Network', 87.54);", "sql": "SELECT algorithm_name, AVG(safety_score) as avg_safety_score FROM ai_algorithms GROUP BY algorithm_name ORDER BY avg_safety_score DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "What is the average number of employees for each sector in the ai_companies table?", "schema": "CREATE TABLE ai_companies (id INT, name VARCHAR(20), location VARCHAR(20), sector VARCHAR(20), employees INT, ethical_ai BOOLEAN); INSERT INTO ai_companies (id, name, location, sector, employees, ethical_ai) VALUES (4, 'GHI Tech', 'USA', 'Robotics', 60, false);", "sql": "SELECT sector, AVG(employees) as avg_employees FROM ai_companies GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average financial wellbeing score for clients in urban areas?", "schema": "CREATE TABLE clients(id INT, name TEXT, location TEXT, financial_wellbeing_score INT);", "sql": "SELECT AVG(c.financial_wellbeing_score) FROM clients c WHERE c.location LIKE '%urban%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the record for the Argonauts on September 7?", "schema": "CREATE TABLE table_24136814_3 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_24136814_3 WHERE date = 'September 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: tries against is 88, played is 22, what is the lost?", "schema": "CREATE TABLE table_name_57 (lost VARCHAR, played VARCHAR, tries_against VARCHAR)", "sql": "SELECT lost FROM table_name_57 WHERE played = '22' AND tries_against = '88';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average temperature for all regions growing 'Corn'?", "schema": "CREATE TABLE farm (id INT PRIMARY KEY, name VARCHAR(50), region_id INT, avg_temp DECIMAL(5,2)); CREATE TABLE region (id INT PRIMARY KEY, name VARCHAR(50)); INSERT INTO region (id, name) VALUES (1, 'Midwest'), (2, 'South'); INSERT INTO farm (id, name, region_id, avg_temp) VALUES (1, 'Smith Farm', 1, 15.5), (2, 'Jones Farm', 1, 16.3), (3, 'Brown Farm', 2, 20.2);", "sql": "SELECT AVG(f.avg_temp) FROM farm f INNER JOIN region r ON f.region_id = r.id WHERE r.name IN (SELECT name FROM region WHERE id IN (SELECT region_id FROM farm WHERE name = 'Corn'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the largest number of people in attendance of the game with a W 14-3 result after week 10?", "schema": "CREATE TABLE table_name_10 (attendance INTEGER, result VARCHAR, week VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_10 WHERE result = 'w 14-3' AND week > 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the team with the 14-9 record?", "schema": "CREATE TABLE table_name_43 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_43 WHERE record = '14-9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the maximum calories burned by a member in a single workout?", "schema": "CREATE TABLE workout_data(member_id INT, calories INT, workout_date DATE); INSERT INTO workout_data(member_id, calories, workout_date) VALUES (1,500,'2021-01-03'),(2,750,'2021-02-15'),(3,600,'2021-03-27'),(4,800,'2021-05-09'),(5,450,'2021-06-21'),(6,900,'2021-07-04'),(7,300,'2021-08-12'),(8,1000,'2021-09-26'),(9,550,'2021-10-08'),(10,700,'2021-11-20'),(11,650,'2021-12-31'),(12,850,'2022-02-14');", "sql": "SELECT MAX(calories) FROM workout_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Second has a Nation of latvia?", "schema": "CREATE TABLE table_name_16 (second VARCHAR, nation VARCHAR)", "sql": "SELECT second FROM table_name_16 WHERE nation = 'latvia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Calculate the average score of user 6 for all games played", "schema": "CREATE TABLE game_scores (user_id INT, game_name VARCHAR(10), score INT); INSERT INTO game_scores (user_id, game_name, score) VALUES (1, 'A', 50), (2, 'B', 100), (3, 'D', 150), (4, 'C', 200), (4, 'C', 250), (6, 'D', 300), (6, 'A', 350);", "sql": "SELECT AVG(score) FROM game_scores WHERE user_id = 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "List all countries, their total production, and the number of active wells in each country, for countries with at least one active well in 2021.", "schema": "CREATE TABLE countries (country_id INT, country_name TEXT); CREATE TABLE wells (well_id INT, country_id INT, well_name TEXT, production_qty INT, start_date DATE, end_date DATE); INSERT INTO countries (country_id, country_name) VALUES (1, 'Country A'), (2, 'Country B'); INSERT INTO wells (well_id, country_id, well_name, production_qty, start_date, end_date) VALUES (1, 1, 'Well A', 500, '2020-01-01', '2022-02-28'), (2, 1, 'Well B', 700, '2021-01-01', '2023-01-01'), (3, 2, 'Well C', 300, '2022-01-01', '2024-01-01');", "sql": "SELECT c.country_name, SUM(w.production_qty) AS total_production, COUNT(w.well_id) AS active_wells FROM countries c INNER JOIN wells w ON c.country_id = w.country_id WHERE w.start_date <= '2021-01-01' AND w.end_date >= '2021-01-01' GROUP BY c.country_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "What is the minimum energy storage utilization rate for the province of Ontario in 2021?", "schema": "CREATE TABLE energy_storage_utilization (province VARCHAR(20), utilization DECIMAL(4,2), year INT); INSERT INTO energy_storage_utilization (province, utilization, year) VALUES ('Ontario', 80.5, 2021), ('Ontario', 82.3, 2021), ('Ontario', 79.2, 2021);", "sql": "SELECT MIN(utilization) FROM energy_storage_utilization WHERE province = 'Ontario' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the total amount of funds spent on refugee support in Africa?", "schema": "CREATE TABLE funds (id INT, category TEXT, region TEXT, amount DECIMAL(10,2)); INSERT INTO funds (id, category, region, amount) VALUES (1, 'Refugee Support', 'Middle East', 250000.00), (2, 'Disaster Response', 'Asia', 300000.00), (3, 'Community Development', 'Africa', 150000.00), (4, 'Refugee Support', 'Africa', 50000.00), (5, 'Refugee Support', 'Africa', 75000.00);", "sql": "SELECT SUM(amount) FROM funds WHERE category = 'Refugee Support' AND region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Highest Attendance, when Rank is 90, and when Venue Name is Infineon Raceway?", "schema": "CREATE TABLE table_name_69 (highest_attendance VARCHAR, rank VARCHAR, venue_name VARCHAR)", "sql": "SELECT highest_attendance FROM table_name_69 WHERE rank = 90 AND venue_name = 'infineon raceway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Insert a new rural development project 'AGRI-INNOVATE 3.0' in Rwanda for 2023 with a budget of 5000000.", "schema": "CREATE TABLE rural_development (project VARCHAR(255), country VARCHAR(255), year INT, budget FLOAT);", "sql": "INSERT INTO rural_development (project, country, year, budget) VALUES ('AGRI-INNOVATE 3.0', 'Rwanda', 2023, 5000000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "List the community members and their ages who are involved in agricultural innovation and are living in 'rural_area_1' from the 'community_development', 'agriculture_innovation', and 'rural_infrastructure' tables", "schema": "CREATE TABLE community_development (member_id INT, member_name VARCHAR(50), age INT, area_id INT); CREATE TABLE agriculture_innovation (farmer_id INT, farmer_name VARCHAR(50), member_id INT); CREATE TABLE rural_infrastructure (project_id INT, project_type VARCHAR(50), budget INT, area_id INT);", "sql": "SELECT c.member_name, c.age FROM community_development c INNER JOIN agriculture_innovation a ON c.member_id = a.member_id INNER JOIN rural_infrastructure r ON c.area_id = r.area_id WHERE c.area_name = 'rural_area_1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type has 5 as the quantity?", "schema": "CREATE TABLE table_name_26 (type VARCHAR, quantity VARCHAR)", "sql": "SELECT type FROM table_name_26 WHERE quantity = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the total installed capacity of renewable energy projects in the state of Florida that involve wind power?", "schema": "CREATE TABLE renewable_energy (project_id INT, project_name VARCHAR(255), city VARCHAR(255), state VARCHAR(255), technology VARCHAR(255), capacity FLOAT);", "sql": "SELECT SUM(capacity) FROM renewable_energy WHERE state = 'Florida' AND technology = 'Wind';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 411).", "schema": null, "sql": "INSERT INTO temporal_fk_rng2rng (id, valid_at, parent_id) VALUES ('[2,3)', daterange('2018-01-02', '2018-04-01'), '[1,2)');", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Find the number of transactions involving 'organic' produce in the 'Northeast' region.", "schema": "CREATE TABLE transactions (id INT, product TEXT, region TEXT, organic BOOLEAN); INSERT INTO transactions (id, product, region, organic) VALUES (1, 'Product 1', 'Northeast', true), (2, 'Product 2', 'Southeast', false);", "sql": "SELECT COUNT(*) FROM transactions WHERE region = 'Northeast' AND organic = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Calculate the total billing amount for cases with a 'Precedential' precedent type, won by attorneys from the 'New York' office?", "schema": "CREATE TABLE offices (office_id INT, office_name VARCHAR(20)); INSERT INTO offices (office_id, office_name) VALUES (1, 'Boston'), (2, 'New York'), (3, 'Chicago'); CREATE TABLE cases (case_id INT, attorney_id INT, office_id INT, precedent_type VARCHAR(20), billing_amount FLOAT, case_outcome VARCHAR(10)); INSERT INTO cases (case_id, attorney_id, office_id, precedent_type, billing_amount, case_outcome) VALUES (1, 101, 1, 'Precedential', 5000, 'Won'), (2, 102, 1, 'Non-Precedential', 3000, 'Lost'), (3, 103, 1, 'Precedential', 7000, 'Won'), (4, 104, 2, 'Non-Precedential', 2000, 'Won'), (5, 105, 3, 'Precedential', 8000, 'Lost');", "sql": "SELECT SUM(billing_amount) FROM cases JOIN offices ON cases.office_id = offices.office_id WHERE offices.office_name = 'New York' AND precedent_type = 'Precedential' AND case_outcome = 'Won';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which away team played against the home team of St Kilda?", "schema": "CREATE TABLE table_name_66 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_66 WHERE home_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Chicago fire has a total of a total of how many #s?", "schema": "CREATE TABLE table_name_5 (pick__number VARCHAR, mls_team VARCHAR)", "sql": "SELECT COUNT(pick__number) FROM table_name_5 WHERE mls_team = 'chicago fire';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 78).", "schema": null, "sql": "SELECT '[{\"attributes\" : [2,3], \"dependency\" : 4, \"dependency\": 4, \"degree\": 1.000}]'::pg_dependencies;", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [2,3], \"dependency\" : 4, \"dependency\": 4, \"degree\": 1.000}]'::pg_dependencies) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Oklahoma vs. when Current Streak is l 1, and Neutral Site is osu, 7-6?", "schema": "CREATE TABLE table_name_15 (oklahoma_vs VARCHAR, current_streak VARCHAR, at_neutral_site VARCHAR)", "sql": "SELECT oklahoma_vs FROM table_name_15 WHERE current_streak = 'l 1' AND at_neutral_site = 'osu, 7-6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Calculate the average price of ethically made items", "schema": "CREATE TABLE sales (id INT, item_id INT, price INT, purchase_date DATE); CREATE TABLE standards (id INT, item_id INT, standard VARCHAR(255)); INSERT INTO sales (id, item_id, price, purchase_date) VALUES (1, 1, 100, '2022-01-01'), (2, 2, 75, '2022-01-02'), (3, 1, 120, '2022-01-03'); INSERT INTO standards (id, item_id, standard) VALUES (1, 1, 'ethical'), (2, 2, 'sustainable');", "sql": "SELECT AVG(price) FROM sales s JOIN standards st ON s.item_id = st.item_id WHERE st.standard = 'ethical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent in 1992?", "schema": "CREATE TABLE table_name_17 (opponent VARCHAR, year VARCHAR)", "sql": "SELECT opponent FROM table_name_17 WHERE year = '1992';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the D48 when the D 50 O is d 31?", "schema": "CREATE TABLE table_name_67 (d_48_√ VARCHAR, d_50_o VARCHAR)", "sql": "SELECT d_48_√ FROM table_name_67 WHERE d_50_o = 'd 31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What station was sent on flight up and was launched on 23 July 1980 18:33:03?", "schema": "CREATE TABLE table_245800_2 (flight_up VARCHAR, launch_date VARCHAR)", "sql": "SELECT flight_up FROM table_245800_2 WHERE launch_date = '23 July 1980 18:33:03';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 66).", "schema": null, "sql": "SELECT * FROM getrngfunc1(1) WITH ORDINALITY AS t1(v,o);", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM getrngfunc1(1) WITH ORDINALITY AS t1(v,o)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what project was initially started before september 20, 1968", "schema": "CREATE TABLE table_291768_1 (commissioned VARCHAR, launched VARCHAR)", "sql": "SELECT commissioned FROM table_291768_1 WHERE launched = 'September 20, 1968';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Berlin when fk pirmasens was Südwest and westfalia herne was west?", "schema": "CREATE TABLE table_name_60 (berlin VARCHAR, südwest VARCHAR, west VARCHAR)", "sql": "SELECT berlin FROM table_name_60 WHERE südwest = 'fk pirmasens' AND west = 'westfalia herne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the names of buildings with at least 200 feet of height and with at least 20 floors.", "schema": "CREATE TABLE building (name VARCHAR, height_feet VARCHAR, floors VARCHAR)", "sql": "SELECT name FROM building WHERE height_feet >= 200 AND floors >= 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "List the number of safety incidents for each chemical type, ordered from most to least incidents?", "schema": "CREATE TABLE safety_incidents (chemical_type VARCHAR(255), incident_date DATE); INSERT INTO safety_incidents (chemical_type, incident_date) VALUES ('Type A', '2020-01-05'), ('Type A', '2020-03-12'), ('Type B', '2020-02-18'), ('Type C', '2020-01-02'), ('Type C', '2020-04-20'), ('Type D', '2020-03-03');", "sql": "SELECT chemical_type, COUNT(*) as incident_count FROM safety_incidents GROUP BY chemical_type ORDER BY incident_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Update the 'workforce' table and set the 'salary' to $60,000 for all workers in the 'mechanical' department", "schema": "CREATE TABLE workforce (id INT, name VARCHAR(255), department VARCHAR(255), salary DECIMAL(8,2));", "sql": "UPDATE workforce SET salary = 60000 WHERE department = 'mechanical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On the date november 24-december 6 what's the winner when the country is united states?", "schema": "CREATE TABLE table_name_35 (winner VARCHAR, country VARCHAR, date VARCHAR)", "sql": "SELECT winner FROM table_name_35 WHERE country = 'united states' AND date = 'november 24-december 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the placement date of the order whose invoice number is 10?", "schema": "CREATE TABLE orders (date_order_placed VARCHAR, order_id VARCHAR); CREATE TABLE shipments (order_id VARCHAR, invoice_number VARCHAR)", "sql": "SELECT T1.date_order_placed FROM orders AS T1 JOIN shipments AS T2 ON T1.order_id = T2.order_id WHERE T2.invoice_number = 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 428).", "schema": null, "sql": "insert into test_2 (name) values ('Test 4');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the winning records when the result is Scotland won on points table and the Runner-Up result is [[|]] 4 points?", "schema": "CREATE TABLE table_28601467_1 (winner VARCHAR, result VARCHAR, runner_up VARCHAR)", "sql": "SELECT winner FROM table_28601467_1 WHERE result = 'Scotland won on points table' AND runner_up = '[[|]] 4 points';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the fa cup apps for arthur morton", "schema": "CREATE TABLE table_19730892_1 (fa_cup_apps VARCHAR, name VARCHAR)", "sql": "SELECT fa_cup_apps FROM table_19730892_1 WHERE name = 'Arthur Morton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 504).", "schema": null, "sql": "create table fktable2 (\n a int,\n b int,\n very_very_long_column_name_to_exceed_63_characters int,\n foreign key (very_very_long_column_name_to_exceed_63_characters) references pktable1,\n foreign key (a, very_very_long_column_name_to_exceed_63_characters) references pktable2,\n foreign key (a, very_very_long_column_name_to_exceed_63_characters) references pktable2\n);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 371, "num_statements": 1} {"question": "What percentage of cosmetics sales in Australia are from eco-friendly packaging?", "schema": "CREATE TABLE cosmetics_sales_australia (id INT, product VARCHAR(50), price DECIMAL(10,2), eco_friendly BOOLEAN, country VARCHAR(50)); INSERT INTO cosmetics_sales_australia (id, product, price, eco_friendly, country) VALUES (1, 'Eco-friendly Mascara', 20.00, TRUE, 'Australia');", "sql": "SELECT 100.0 * SUM(CASE WHEN eco_friendly THEN price ELSE 0 END) / SUM(price) AS percentage FROM cosmetics_sales_australia WHERE country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the average age of players who play VR games, and how many VR games have been published?", "schema": "CREATE TABLE Players (PlayerID int, Age int, Gender varchar(10), GamePreference varchar(20)); INSERT INTO Players (PlayerID, Age, Gender, GamePreference) VALUES (1, 25, 'Male', 'VR'); INSERT INTO Players (PlayerID, Age, Gender, GamePreference) VALUES (2, 30, 'Female', 'Non-VR'); CREATE TABLE Games (GameID int, GameName varchar(20), Genre varchar(10), VR boolean); INSERT INTO Games (GameID, GameName, Genre, VR) VALUES (1, 'Game1', 'Action', false); INSERT INTO Games (GameID, GameName, Genre, VR) VALUES (2, 'Game2', 'Adventure', true);", "sql": "SELECT AVG(Players.Age) AS AvgAge, COUNT(Games.GameID) AS VRGameCount FROM Players INNER JOIN Games ON Players.GamePreference = 'VR' AND Games.VR = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the name of the latest vulnerability reported in the 'vulnerabilities' table?", "schema": "CREATE TABLE vulnerabilities (id INT, name VARCHAR(255), description TEXT, severity VARCHAR(50), reported_date DATE);", "sql": "SELECT name FROM vulnerabilities ORDER BY reported_date DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total number of fishing vessels in the 'Indian Ocean' that are over 20 years old?'", "schema": "CREATE TABLE fishing_vessels (name VARCHAR(50), region VARCHAR(20), age INTEGER); INSERT INTO fishing_vessels (name, region, age) VALUES ('Vessel A', 'Indian Ocean', 15), ('Vessel B', 'Indian Ocean', 25), ('Vessel C', 'Atlantic', 10);", "sql": "SELECT COUNT(*) FROM fishing_vessels WHERE region = 'Indian Ocean' AND age > 20;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Trigger (example 19).", "schema": null, "sql": "CREATE VIEW trigger_test_view AS SELECT * FROM trigger_test;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many art pieces were created by artists from underrepresented communities in the last decade?", "schema": "CREATE TABLE art_pieces_identity (id INT, year INT, artist_name VARCHAR(50), art_type VARCHAR(50), underrepresented_community VARCHAR(50));", "sql": "SELECT COUNT(*) as total_art_pieces FROM art_pieces_identity WHERE year >= 2012 AND underrepresented_community IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the average year when traditional arts were first practiced?", "schema": "CREATE TABLE traditional_arts (id INT, art_name VARCHAR(255), year INT, country VARCHAR(255)); INSERT INTO traditional_arts (id, art_name, year, country) VALUES (1, 'Ukiyo-e', 1600, 'Japan'), (2, 'Taracea', 1700, 'Mexico');", "sql": "SELECT AVG(year) FROM traditional_arts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "How many unique subscribers have both mobile and broadband plans?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, plan_name TEXT); CREATE TABLE broadband_subscribers (subscriber_id INT, plan_name TEXT);", "sql": "SELECT COUNT(DISTINCT mobile_subscribers.subscriber_id) FROM mobile_subscribers INNER JOIN broadband_subscribers ON mobile_subscribers.subscriber_id = broadband_subscribers.subscriber_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Identify the transaction with the largest amount in each quarter for the past two years.", "schema": "CREATE TABLE transactions (id INT, transaction_date DATE, amount DECIMAL(10, 2)); INSERT INTO transactions (id, transaction_date, amount) VALUES (1, '2022-01-01', 500.00), (2, '2022-02-01', 750.00), (3, '2021-01-01', 1000.00);", "sql": "SELECT transaction_date, amount FROM (SELECT transaction_date, amount, ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('quarter', transaction_date) ORDER BY amount DESC) as rn FROM transactions WHERE transaction_date >= (CURRENT_DATE - INTERVAL '2 year')) t WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 266, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the least obesity rank for the state of Utah?", "schema": "CREATE TABLE table_18958648_1 (obesity_rank INTEGER, state_and_district_of_columbia VARCHAR)", "sql": "SELECT MIN(obesity_rank) FROM table_18958648_1 WHERE state_and_district_of_columbia = 'Utah';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the total price of all properties in inclusive housing schemes in Oakland?", "schema": "CREATE TABLE inclusive_housing (property_id INT, city VARCHAR(50), price INT, inclusive_scheme BOOLEAN); INSERT INTO inclusive_housing (property_id, city, price, inclusive_scheme) VALUES (1, 'Oakland', 600000, TRUE), (2, 'Portland', 500000, FALSE), (3, 'Oakland', 700000, TRUE), (4, 'Seattle', 800000, FALSE);", "sql": "SELECT SUM(price) FROM inclusive_housing WHERE city = 'Oakland' AND inclusive_scheme = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Which states have more than 5 disability support programs?", "schema": "CREATE TABLE states (state_id INT, state_name VARCHAR(50), num_programs INT); INSERT INTO states (state_id, state_name, num_programs) VALUES (1, 'California', 7), (2, 'Texas', 3), (3, 'New York', 6), (4, 'Florida', 4);", "sql": "SELECT state_name FROM (SELECT state_name, ROW_NUMBER() OVER (ORDER BY num_programs DESC) as rn FROM states) t WHERE rn <= (SELECT COUNT(*) FROM states WHERE num_programs > 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 176, "num_statements": 1} {"question": "What is the average price of cruelty-free products?", "schema": "CREATE TABLE products (id INT, company VARCHAR(255), price DECIMAL(5,2), cruelty_free BOOLEAN); INSERT INTO products (id, company, price, cruelty_free) VALUES (1, 'ABC', 50.99, TRUE), (2, 'DEF', 35.49, FALSE), (3, 'GHI', 65.99, TRUE), (4, 'JKL', 25.99, TRUE);", "sql": "SELECT AVG(price) FROM products WHERE cruelty_free = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average time between appointments for patients with chronic conditions in rural health clinic B?", "schema": "CREATE TABLE clinics (clinic_id INT, clinic_name VARCHAR(50)); CREATE TABLE appointments (appointment_id INT, patient_id INT, appointment_date DATE, clinic_id INT, chronic_condition BOOLEAN);", "sql": "SELECT AVG(DATEDIFF(appointments.appointment_date, LAG(appointments.appointment_date) OVER (PARTITION BY appointments.patient_id ORDER BY appointments.appointment_date))) as avg_time_between_appointments FROM appointments WHERE appointments.clinic_id = (SELECT clinic_id FROM clinics WHERE clinic_name = 'rural health clinic B') AND appointments.chronic_condition = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 371, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result of nominee, Patricia McGourty, for the drama desk award?", "schema": "CREATE TABLE table_name_17 (result VARCHAR, award VARCHAR, nominee VARCHAR)", "sql": "SELECT result FROM table_name_17 WHERE award = 'drama desk award' AND nominee = 'patricia mcgourty';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What are the total waste generation metrics for each city in the region 'West Coast'?", "schema": "CREATE TABLE cities (city_name VARCHAR(50), region VARCHAR(50)); INSERT INTO cities (city_name, region) VALUES ('San Francisco', 'West Coast'), ('Los Angeles', 'West Coast'), ('Seattle', 'West Coast'); CREATE TABLE waste_generation (city_name VARCHAR(50), waste_metric INT); INSERT INTO waste_generation (city_name, waste_metric) VALUES ('San Francisco', 1200), ('Los Angeles', 1500), ('Seattle', 1800);", "sql": "SELECT wg.city_name, SUM(waste_metric) as total_waste_metric FROM waste_generation wg JOIN cities c ON wg.city_name = c.city_name WHERE c.region = 'West Coast' GROUP BY wg.city_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "What is the average duration of virtual tours for each hotel?", "schema": "CREATE TABLE hotels (hotel_id INT, name VARCHAR(50), category VARCHAR(20), rating DECIMAL(2,1)); INSERT INTO hotels (hotel_id, name, category, rating) VALUES (1, 'The Urban Chic', 'boutique', 4.5), (2, 'The Artistic Boutique', 'boutique', 4.7), (3, 'The Cozy Inn', 'budget', 4.2); CREATE TABLE virtual_tours (tour_id INT, hotel_id INT, title VARCHAR(50), duration INT); INSERT INTO virtual_tours (tour_id, hotel_id, title, duration) VALUES (1, 1, 'Virtual Tour: The Urban Chic Lobby', 15), (2, 1, 'Virtual Tour: The Urban Chic Rooms', 30), (3, 2, 'Virtual Tour: The Artistic Boutique Lobby', 10), (4, 3, 'Virtual Tour: The Cozy Inn Rooms', 20);", "sql": "SELECT hotel_id, AVG(duration) AS avg_duration FROM virtual_tours GROUP BY hotel_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'tablespace' (example 146).", "schema": null, "sql": "CREATE INDEX test_tab_a_idx ON testschema.test_tab (a);", "explanation": "DDL from PostgreSQL core regression test for Tablespace.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total number of impact investments made by donors from the United States?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, country TEXT); INSERT INTO donors (donor_id, donor_name, country) VALUES (1, 'John Doe', 'United States'); CREATE TABLE impact_investments (investment_id INT, donor_id INT, investment_amount FLOAT); INSERT INTO impact_investments (investment_id, donor_id, investment_amount) VALUES (1, 1, 50000.0);", "sql": "SELECT SUM(investment_amount) FROM impact_investments JOIN donors ON donors.donor_id = impact_investments.donor_id WHERE donors.country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many stages did Team Sky lead the teams classification?", "schema": "CREATE TABLE table_26010857_13 (stage INTEGER, teams_classification VARCHAR)", "sql": "SELECT MAX(stage) FROM table_26010857_13 WHERE teams_classification = 'Team Sky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Insert a new space debris record into the \"space_debris_monitoring\" table for an object launched by India in 2008.", "schema": "CREATE TABLE space_debris_monitoring (id INT, object_name VARCHAR(50), launch_country VARCHAR(50), launch_date DATE, latitude FLOAT, longitude FLOAT);", "sql": "INSERT INTO space_debris_monitoring (object_name, launch_country, launch_date, latitude, longitude) VALUES ('Debris_2008_India', 'India', '2008-01-01', 10.123456, 20.123456);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Plperlu (example 7).", "schema": null, "sql": "-- plperlu first\ncreate or replace function bar(text) returns text language plperlu as 'shift';", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Plperlu.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "PostgreSQL regression test 'json_encoding': Write the SELECT query (example 4).", "schema": null, "sql": "select json '{ \"a\": \"dollar \\\\u0024 character\" }' as not_an_escape;", "explanation": "Regression test for Json Encoding in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select json '{ \"a\": \"dollar \\\\u0024 character\" }' as not_an_escape) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many grand final dual television commentators were there in 1961?", "schema": "CREATE TABLE table_1368649_9 (grand_final_dual_television_commentator VARCHAR, year_s_ VARCHAR)", "sql": "SELECT COUNT(grand_final_dual_television_commentator) FROM table_1368649_9 WHERE year_s_ = 1961;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the percentage of tenured faculty members in each department?", "schema": "CREATE TABLE faculty (faculty_id INT, name VARCHAR(50), gender VARCHAR(10), department VARCHAR(50), is_tenured BOOLEAN, position VARCHAR(50)); INSERT INTO faculty VALUES (1, 'John Doe', 'Male', 'Mathematics', TRUE, 'Professor'), (2, 'Jane Smith', 'Female', 'Physics', FALSE, 'Assistant Professor'), (3, 'Alice Johnson', 'Female', 'Mathematics', TRUE, 'Associate Professor');", "sql": "SELECT department, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM faculty WHERE is_tenured = TRUE) as tenured_percentage FROM faculty WHERE is_tenured = TRUE GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "List all tech companies in 'tech_companies' table located in 'California'.", "schema": "CREATE TABLE tech_companies (company VARCHAR(50), department VARCHAR(50), employee_name VARCHAR(50), salary INTEGER, company_location VARCHAR(50));", "sql": "SELECT company FROM tech_companies WHERE company_location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of security incidents caused by malware in the past week?", "schema": "CREATE TABLE security_incidents (id INT, type TEXT, timestamp TIMESTAMP); INSERT INTO security_incidents (id, type, timestamp) VALUES (1, 'phishing', '2021-02-01 12:00:00'), (2, 'malware', '2021-02-04 14:30:00'), (3, 'phishing', '2021-02-05 10:15:00'), (4, 'malware', '2021-02-06 16:45:00');", "sql": "SELECT COUNT(*) FROM security_incidents WHERE type = 'malware' AND timestamp >= NOW() - INTERVAL '1 week';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The community station broadcasting at frequency 0 96.9 is in what band?", "schema": "CREATE TABLE table_name_87 (band VARCHAR, purpose VARCHAR, frequency VARCHAR)", "sql": "SELECT band FROM table_name_87 WHERE purpose = 'community' AND frequency = '0 96.9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most finish for 2006", "schema": "CREATE TABLE table_name_9 (finish INTEGER, year VARCHAR)", "sql": "SELECT MAX(finish) FROM table_name_9 WHERE year = 2006;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the league goals when the total goals is 179?", "schema": "CREATE TABLE table_29701419_2 (league_goals INTEGER, total_goals VARCHAR)", "sql": "SELECT MAX(league_goals) FROM table_29701419_2 WHERE total_goals = 179;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Which natural ingredient is most commonly used in skincare products?", "schema": "CREATE TABLE product_ingredients (product_id INT, product_category VARCHAR(50), ingredient VARCHAR(50)); INSERT INTO product_ingredients (product_id, product_category, ingredient) VALUES (1009, 'skincare', 'aloe vera'), (1010, 'skincare', 'lavender oil'), (1011, 'haircare', 'coconut oil'), (1012, 'skincare', 'jojoba oil'), (1013, 'makeup', 'talc');", "sql": "SELECT ingredient, COUNT(*) AS ingredient_count FROM product_ingredients WHERE product_category = 'skincare' GROUP BY ingredient ORDER BY ingredient_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Find the number of grants awarded to each department in the College of Social Sciences.", "schema": "CREATE TABLE departments (id INT, name VARCHAR(50)); CREATE TABLE grants (id INT, department_id INT, amount INT); INSERT INTO departments VALUES (1, 'Psychology'), (2, 'Sociology'), (3, 'Anthropology'); INSERT INTO grants VALUES (1, 1, 5000), (2, 1, 7000), (3, 2, 6000), (4, 3, 4000);", "sql": "SELECT department_id, COUNT(*) AS grant_count FROM grants GROUP BY department_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_behavioral_analytics_create_table, item 47).", "schema": null, "sql": "-- Create composite type to use in subquery pushdown\nCREATE TYPE user_composite_type AS\n(\n tenant_id BIGINT,\n user_id BIGINT\n);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 1} {"question": "List the number of wind farms in Canada, Brazil, and Argentina, as of 2020.", "schema": "CREATE TABLE wind_farms (country VARCHAR(50), operational BOOLEAN, year INT); INSERT INTO wind_farms (country, operational, year) VALUES ('Canada', true, 2020), ('Brazil', true, 2020), ('Argentina', true, 2020), ('Mexico', false, 2020);", "sql": "SELECT country, COUNT(*) FROM wind_farms WHERE country IN ('Canada', 'Brazil', 'Argentina') AND operational = true GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Total sales of drugs manufactured by ManuE", "schema": "CREATE TABLE sales (sale_id INT, drug_name TEXT, manufacturer TEXT, sales_figure DECIMAL); INSERT INTO sales (sale_id, drug_name, manufacturer, sales_figure) VALUES (1, 'DrugO', 'ManuE', 3000000), (2, 'DrugP', 'ManuF', 4000000);", "sql": "SELECT SUM(sales_figure) FROM sales WHERE manufacturer = 'ManuE';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List the total budget for rural infrastructure projects by project type and project status, and calculate the average budget per project in the 'rural_infrastructure_projects' table.", "schema": "CREATE TABLE rural_infrastructure_projects (project_type VARCHAR(255), project_status VARCHAR(255), budget INT); INSERT INTO rural_infrastructure_projects (project_type, project_status, budget) VALUES ('Bridge', 'Under Construction', 2000000), ('Road', 'Completed', 1500000);", "sql": "SELECT project_type, project_status, SUM(budget) AS total_budget, AVG(budget) AS average_budget_per_project FROM rural_infrastructure_projects GROUP BY project_type, project_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What game has 28 points, and tampa bay lightning as the opponent?", "schema": "CREATE TABLE table_name_50 (game VARCHAR, points VARCHAR, opponent VARCHAR)", "sql": "SELECT game FROM table_name_50 WHERE points = 28 AND opponent = 'tampa bay lightning';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of opponents played against the bears in game 4?", "schema": "CREATE TABLE table_21035326_1 (opponents VARCHAR, game VARCHAR)", "sql": "SELECT COUNT(opponents) FROM table_21035326_1 WHERE game = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the assets are 2,550, what is the Market Value?", "schema": "CREATE TABLE table_1682026_2 (market_value__billion_$_ VARCHAR, assets__billion_$_ VARCHAR)", "sql": "SELECT market_value__billion_$_ FROM table_1682026_2 WHERE assets__billion_$_ = '2,550';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 94).", "schema": null, "sql": "update arrtest1 set i[-7:-6] = array[-17,null], t[-7:-6] = array['m17',null];", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 13).", "schema": null, "sql": "INSERT INTO J1_TBL VALUES (NULL, 0, 'zero');", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What away team played against Footscray as the home team?", "schema": "CREATE TABLE table_name_50 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_50 WHERE home_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Find the average gold and silver production quantities for each mine, excluding mines with missing data.", "schema": "CREATE TABLE mine (name VARCHAR(255), location VARCHAR(255)); CREATE TABLE gold_mine_production (mine_name VARCHAR(255), quantity INT); CREATE TABLE silver_mine_production (mine_name VARCHAR(255), quantity INT);", "sql": "SELECT gold_mine_production.mine_name, AVG(gold_mine_production.quantity) AS avg_gold_quantity, AVG(silver_mine_production.quantity) AS avg_silver_quantity FROM gold_mine_production INNER JOIN silver_mine_production ON gold_mine_production.mine_name = silver_mine_production.mine_name GROUP BY gold_mine_production.mine_name HAVING COUNT(*) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 345, "num_statements": 1} {"question": "What is the average daily revenue for hotels in the 'EMEA' region for the year 2022?", "schema": "CREATE TABLE daily_revenue (id INT, hotel_id INT, region TEXT, calendar DATE, revenue FLOAT);", "sql": "SELECT region, AVG(revenue) FROM daily_revenue WHERE region = 'EMEA' AND YEAR(calendar) = 2022 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the percentage of cases that were resolved through restorative justice practices for each year?", "schema": "CREATE TABLE cases (case_id INT, resolution_type VARCHAR(50), resolution_year INT); INSERT INTO cases (case_id, resolution_type, resolution_year) VALUES (1, 'restorative_justice', 2020); INSERT INTO cases (case_id, resolution_type, resolution_year) VALUES (2, 'prosecution', 2020); INSERT INTO cases (case_id, resolution_type, resolution_year) VALUES (3, 'restorative_justice', 2021);", "sql": "SELECT resolution_year, resolution_type, COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY resolution_year) as percentage FROM cases WHERE resolution_type = 'restorative_justice' GROUP BY resolution_year, resolution_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 223, "num_statements": 1} {"question": "How many students have participated in open pedagogy projects, and what are the average and total project costs?", "schema": "CREATE TABLE project (project_id INT, project_name VARCHAR(50), num_students INT, avg_cost DECIMAL(5,2), total_cost DECIMAL(10,2), PRIMARY KEY(project_id)); INSERT INTO project (project_id, project_name, num_students, avg_cost, total_cost) VALUES (1, 'Open Source Software Development', 15, 500, 7500), (2, 'Data Journalism', 10, 300, 3000), (3, 'Digital Citizenship', 20, 250, 5000);", "sql": "SELECT num_students, AVG(avg_cost) as avg_project_cost, SUM(total_cost) as total_project_cost FROM project;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location of the b.c. open?", "schema": "CREATE TABLE table_name_81 (location VARCHAR, tournament VARCHAR)", "sql": "SELECT location FROM table_name_81 WHERE tournament = 'b.c. open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Update the price of 'Solar-Powered Lamps' to $29.99 in stores located in 'New York' and 'Texas'", "schema": "CREATE TABLE Stores (store_id INT, store_name VARCHAR(50), state VARCHAR(50)); INSERT INTO Stores (store_id, store_name, state) VALUES (1, 'Eco-Market', 'New York'), (2, 'Green Vista', 'Texas'); CREATE TABLE Inventory (product_id INT, product_name VARCHAR(50), store_id INT, price DECIMAL(5, 2)); INSERT INTO Inventory (product_id, product_name, store_id, price) VALUES (1, 'Solar-Powered Lamps', 1, 24.99), (2, 'Bamboo Toothbrushes', 2, 9.99);", "sql": "UPDATE Inventory SET price = 29.99 WHERE product_name = 'Solar-Powered Lamps' AND store_id IN (SELECT store_id FROM Stores WHERE state IN ('New York', 'Texas'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which record has Katsuomi Inagaki as an opponent?", "schema": "CREATE TABLE table_name_51 (record VARCHAR, opponent VARCHAR)", "sql": "SELECT record FROM table_name_51 WHERE opponent = 'katsuomi inagaki';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 714).", "schema": null, "sql": "SELECT to_char('100'::numeric, 'f\"ool\\\"999');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('100'::numeric, 'f\"ool\\\"999')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the average energy efficiency rating for appliances in the European Union?", "schema": "CREATE TABLE eu_appliances (id INT, country VARCHAR(255), name VARCHAR(255), energy_efficiency_rating FLOAT); INSERT INTO eu_appliances (id, country, name, energy_efficiency_rating) VALUES (1, 'France', 'Appliance A', 3.5), (2, 'Germany', 'Appliance B', 4.2), (3, 'Italy', 'Appliance C', 3.9);", "sql": "SELECT AVG(energy_efficiency_rating) FROM eu_appliances WHERE country IN (SELECT name FROM countries WHERE region = 'European Union');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Delete records in the wildlife table where the animal_type is 'Bird' and region is 'Asia'", "schema": "CREATE TABLE wildlife (id INT PRIMARY KEY, animal_type TEXT, region TEXT, conservation_status TEXT); INSERT INTO wildlife (id, animal_type, region, conservation_status) VALUES (1, 'Bird', 'Asia', 'Vulnerable');", "sql": "DELETE FROM wildlife WHERE animal_type = 'Bird' AND region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the year built for withdrawn of 1983", "schema": "CREATE TABLE table_name_19 (year_built__converted VARCHAR, _ VARCHAR, withdrawn VARCHAR)", "sql": "SELECT year_built__converted * _ FROM table_name_19 WHERE withdrawn = 1983;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the engine Ford Cosworth DFV 3.0 v8 has a chassis of m23 and in rounds 14-15, what is its Tyre?", "schema": "CREATE TABLE table_name_51 (tyre VARCHAR, chassis VARCHAR, rounds VARCHAR, engine VARCHAR)", "sql": "SELECT tyre FROM table_name_51 WHERE rounds = '14-15' AND engine = 'ford cosworth dfv 3.0 v8' AND chassis = 'm23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which route has 21 stations?", "schema": "CREATE TABLE table_name_31 (route VARCHAR, stations VARCHAR)", "sql": "SELECT route FROM table_name_31 WHERE stations = 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2012 population for the state whose capital is Santa Fe?", "schema": "CREATE TABLE table_name_61 (population_est__2012_ VARCHAR, capital VARCHAR)", "sql": "SELECT population_est__2012_ FROM table_name_61 WHERE capital = 'santa fe';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the ids of the movies that are not reviewed by Brittany Harris.", "schema": "CREATE TABLE Reviewer (rID VARCHAR, name VARCHAR); CREATE TABLE Rating (mID VARCHAR); CREATE TABLE Rating (mID VARCHAR, rID VARCHAR)", "sql": "SELECT mID FROM Rating EXCEPT SELECT T1.mID FROM Rating AS T1 JOIN Reviewer AS T2 ON T1.rID = T2.rID WHERE T2.name = 'Brittany Harris';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Determine the number of public events held in the city of Los Angeles in each month of the year 2021", "schema": "CREATE TABLE public_events (event_id INT, city VARCHAR(20), year INT, month INT, events_held INT); INSERT INTO public_events (event_id, city, year, month, events_held) VALUES (1, 'Los Angeles', 2021, 1, 10);", "sql": "SELECT month, SUM(events_held) FROM public_events WHERE city = 'Los Angeles' AND year = 2021 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "How many students have enrolled in each course offering, and what is the average mental health rating for each course?", "schema": "CREATE TABLE course_offerings (course_id INT, offering_id INT, enrollment INT); INSERT INTO course_offerings (course_id, offering_id, enrollment) VALUES (1, 1, 30), (1, 2, 25), (2, 1, 20), (2, 2, 22), (3, 1, 40), (3, 2, 38), (4, 1, 35), (4, 2, 33); CREATE TABLE courses (course_id INT, mental_health_rating FLOAT); INSERT INTO courses (course_id, mental_health_rating) VALUES (1, 4.5), (2, 3.8), (3, 4.7), (4, 2.5);", "sql": "SELECT co.offering_id, c.course_id, AVG(c.mental_health_rating) AS avg_rating, SUM(co.enrollment) AS total_enrollment FROM course_offerings co JOIN courses c ON co.course_id = c.course_id GROUP BY co.offering_id, c.course_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Update the region of donors who have donated more than $5000 in the 'asia' region to 'emerging_market'.", "schema": "CREATE TABLE donors (id INT, name TEXT, region TEXT, donation_amount FLOAT); INSERT INTO donors (id, name, region, donation_amount) VALUES (1, 'John Doe', 'Asia', 5000.00), (2, 'Jane Smith', 'Europe', 3000.00);", "sql": "UPDATE donors SET region = 'Emerging_Market' WHERE region = 'Asia' AND donation_amount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the increase in the number of local vendors participating in sustainable events in Mexico?", "schema": "CREATE TABLE VendorParticipation (ParticipationID INT, Country VARCHAR(50), Vendors INT); INSERT INTO VendorParticipation (ParticipationID, Country, Vendors) VALUES (1, 'Mexico', 50), (2, 'Mexico', 60);", "sql": "SELECT SUM(Vendors) FROM VendorParticipation WHERE Country = 'Mexico';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average playtime for players who have played games with a price greater than 50, by gender?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (1, 25, 'Male', 'USA'), (2, 30, 'Female', 'Canada'), (3, 22, 'Male', 'Mexico'); CREATE TABLE GamePlay (PlayerID INT, Playtime INT, GamePrice DECIMAL(5, 2)); INSERT INTO GamePlay (PlayerID, Playtime, GamePrice) VALUES (1, 120, 60.00), (2, 90, 45.00), (3, 150, 55.00), (4, 100, 70.00), (5, 80, 75.00);", "sql": "SELECT Gender, AVG(Playtime) FROM Players INNER JOIN GamePlay ON Players.PlayerID = GamePlay.PlayerID WHERE GamePrice > 50 GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Scott Gomez's biggest weight?", "schema": "CREATE TABLE table_name_57 (weight__kg_ INTEGER, name VARCHAR)", "sql": "SELECT MAX(weight__kg_) FROM table_name_57 WHERE name = 'scott gomez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the team classification for stage of 6", "schema": "CREATE TABLE table_name_47 (team_classification VARCHAR, stage VARCHAR)", "sql": "SELECT team_classification FROM table_name_47 WHERE stage = '6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of mining accidents in each province in Canada?", "schema": "CREATE TABLE accidents (id INT, province VARCHAR(50), industry VARCHAR(50), num_accidents INT); INSERT INTO accidents (id, province, industry, num_accidents) VALUES (1, 'Ontario', 'mining', 50); INSERT INTO accidents (id, province, industry, num_accidents) VALUES (2, 'Quebec', 'mining', 30); INSERT INTO accidents (id, province, industry, num_accidents) VALUES (3, 'Alberta', 'oil', 20);", "sql": "SELECT province, SUM(num_accidents) FROM accidents WHERE industry = 'mining' GROUP BY province;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the percentage of employees from underrepresented racial groups in the mining industry?", "schema": "CREATE TABLE workforce (id INT, name VARCHAR(50), ethnicity VARCHAR(50), position VARCHAR(50), department VARCHAR(50)); INSERT INTO workforce (id, name, ethnicity, position, department) VALUES (1, 'John Doe', 'Caucasian', 'Engineer', 'Mining'), (2, 'Jane Smith', 'African American', 'Technician', 'Environment'), (3, 'Alice Johnson', 'Hispanic', 'Manager', 'Operations');", "sql": "SELECT (COUNT(CASE WHEN ethnicity IN ('African American', 'Hispanic', 'Native American', 'Asian', 'Pacific Islander') THEN 1 ELSE 0 END) * 100.0 / COUNT(*)) as underrepresented_percentage FROM workforce;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which location has 101,119 as the capacity?", "schema": "CREATE TABLE table_name_72 (location VARCHAR, capacity VARCHAR)", "sql": "SELECT location FROM table_name_72 WHERE capacity = '101,119';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 38).", "schema": null, "sql": "SELECT 'xAb' !~* '[c-d]' COLLATE PG_UNICODE_FAST;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'xAb' !~* '[c-d]' COLLATE PG_UNICODE_FAST) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 160).", "schema": null, "sql": "SELECT 'sad'::mood::citext = 'sad' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total crowd count for the venue Princes Park?", "schema": "CREATE TABLE table_name_23 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT SUM(crowd) FROM table_name_23 WHERE venue = 'princes park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total amount of water saved through conservation initiatives in the year 2020?", "schema": "CREATE TABLE savings (id INT, amount FLOAT, year INT); INSERT INTO savings (id, amount, year) VALUES (1, 1000, 2020), (2, 1500, 2019), (3, 2000, 2018);", "sql": "SELECT SUM(amount) FROM savings WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average age of all volunteers who have led community education programs?", "schema": "CREATE TABLE volunteers (volunteer_id INT, age INT, has_led_program BOOLEAN);", "sql": "SELECT AVG(age) FROM volunteers WHERE has_led_program = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "List all oceanographic studies in 'Atlantic Ocean' since 2010.", "schema": "CREATE TABLE if not exists oceanographic_studies (id INT, name TEXT, location TEXT, year INT);", "sql": "SELECT * FROM oceanographic_studies WHERE location = 'Atlantic Ocean' AND year >= 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Score, when Game is \"19\"?", "schema": "CREATE TABLE table_name_11 (score VARCHAR, game VARCHAR)", "sql": "SELECT score FROM table_name_11 WHERE game = 19;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the average ESG score for companies based in the United States?", "schema": "CREATE TABLE companies (company_id INT, country VARCHAR(50), esg_score FLOAT); INSERT INTO companies (company_id, country, esg_score) VALUES (1, 'United States', 75.0), (2, 'Canada', 82.3), (3, 'United States', 68.9);", "sql": "SELECT AVG(esg_score) FROM companies WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average severity rating of all vulnerabilities in the 'network' category?", "schema": "CREATE TABLE vulnerabilities (id INT, category VARCHAR(255), severity INT); INSERT INTO vulnerabilities (id, category, severity) VALUES (1, 'network', 8), (2, 'malware', 5);", "sql": "SELECT AVG(severity) FROM vulnerabilities WHERE category = 'network';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many starts have a year prior to 2012, and team penske as the team, with a finish greater than 27?", "schema": "CREATE TABLE table_name_35 (start INTEGER, finish VARCHAR, year VARCHAR, team VARCHAR)", "sql": "SELECT SUM(start) FROM table_name_35 WHERE year < 2012 AND team = 'team penske' AND finish > 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of every song, and how many weeks was each song at #1 for One Direction?", "schema": "CREATE TABLE table_19542477_9 (song_s__—_weeks VARCHAR, artist_s_ VARCHAR)", "sql": "SELECT song_s__—_weeks FROM table_19542477_9 WHERE artist_s_ = 'One Direction';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of the dorm with the largest capacity.", "schema": "CREATE TABLE dorm (dorm_name VARCHAR, student_capacity VARCHAR)", "sql": "SELECT dorm_name FROM dorm ORDER BY student_capacity DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 157).", "schema": null, "sql": "CREATE FUNCTION gbt_cash_compress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Identify artists in the 'ArtistsDemographics' table who are older than 40 and from the 'American' nationality.", "schema": "CREATE TABLE ArtistsDemographics (ArtistID INT, Age INT, Gender VARCHAR(10), Nationality VARCHAR(50)); INSERT INTO ArtistsDemographics (ArtistID, Age, Gender, Nationality) VALUES (1, 45, 'Male', 'American'), (2, 34, 'Female', 'Canadian'), (3, 50, 'Male', 'British'), (4, 35, 'Female', 'Mexican'), (5, 40, 'Non-binary', 'Australian');", "sql": "SELECT * FROM ArtistsDemographics WHERE Age > 40 AND Nationality = 'American';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the average manufacturing cost for aircraft components?", "schema": "CREATE TABLE AircraftComponents (id INT, component_name VARCHAR(50), manufacturing_cost FLOAT); CREATE VIEW ComponentManufacturingCosts AS SELECT component_name, AVG(manufacturing_cost) as avg_cost FROM AircraftComponents GROUP BY component_name;", "sql": "SELECT ComponentManufacturingCosts.component_name, ComponentManufacturingCosts.avg_cost FROM ComponentManufacturingCosts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the brands of lenses that took both a picture of mountains with range 'Toubkal Atlas' and a picture of mountains with range 'Lasta Massif'", "schema": "CREATE TABLE mountain (id VARCHAR, range VARCHAR); CREATE TABLE photos (mountain_id VARCHAR, camera_lens_id VARCHAR); CREATE TABLE camera_lens (brand VARCHAR, id VARCHAR)", "sql": "SELECT T3.brand FROM mountain AS T1 JOIN photos AS T2 ON T1.id = T2.mountain_id JOIN camera_lens AS T3 ON T2.camera_lens_id = T3.id WHERE T1.range = 'Toubkal Atlas' INTERSECT SELECT T3.brand FROM mountain AS T1 JOIN photos AS T2 ON T1.id = T2.mountain_id JOIN camera_lens AS T3 ON T2.camera_lens_id = T3.id WHERE T1.range = 'Lasta Massif';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 339, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many females speak Ukrainian?", "schema": "CREATE TABLE table_name_11 (females VARCHAR, language VARCHAR)", "sql": "SELECT females FROM table_name_11 WHERE language = 'ukrainian';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average network investment per day for the past year?", "schema": "CREATE TABLE network_investments (investment_id INT, investment_amount DECIMAL(10,2), investment_date DATE); INSERT INTO network_investments (investment_id, investment_amount, investment_date) VALUES (1, 25000.00, '2021-12-25'), (2, 30000.00, '2022-01-07'), (3, 15000.00, '2022-02-01');", "sql": "SELECT AVG(investment_amount) FROM network_investments WHERE investment_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Date has an Opposing Pitcher of jack morris, and an Inning of 4th?", "schema": "CREATE TABLE table_name_88 (date VARCHAR, opposing_pitcher VARCHAR, inning VARCHAR)", "sql": "SELECT date FROM table_name_88 WHERE opposing_pitcher = 'jack morris' AND inning = '4th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Which employees have completed the most cultural competency training hours?", "schema": "CREATE TABLE CulturalCompetencyTraining (ID INT PRIMARY KEY, EmployeeID INT, TrainingType VARCHAR(20), Hours INT, Date DATE);", "sql": "SELECT EmployeeID, SUM(Hours) as TotalHours FROM CulturalCompetencyTraining GROUP BY EmployeeID ORDER BY TotalHours DESC LIMIT 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the category of na director", "schema": "CREATE TABLE table_name_75 (category VARCHAR, director VARCHAR)", "sql": "SELECT category FROM table_name_75 WHERE director = 'na';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was there an attendance of 51,342?", "schema": "CREATE TABLE table_name_6 (date VARCHAR, attendance VARCHAR)", "sql": "SELECT date FROM table_name_6 WHERE attendance = '51,342';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average carbon emission for mining operations in Africa?", "schema": "CREATE TABLE MiningOperations (OperationID INT, MineName VARCHAR(50), Location VARCHAR(50), CarbonEmissions INT); INSERT INTO MiningOperations (OperationID, MineName, Location, CarbonEmissions) VALUES (1, 'Platinum Mine', 'South Africa', 100), (2, 'Gold Mine', 'Ghana', 120), (3, 'Diamond Mine', 'Botswana', 150);", "sql": "SELECT AVG(CarbonEmissions) FROM MiningOperations WHERE Location LIKE 'Africa%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who directed the film Antonio's Breakfast?", "schema": "CREATE TABLE table_name_44 (director_s_ VARCHAR, film VARCHAR)", "sql": "SELECT director_s_ FROM table_name_44 WHERE film = 'antonio's breakfast';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Maximum CO2 emissions from a single industrial process", "schema": "CREATE TABLE industrial_processes (id INT, process_name VARCHAR(255), sector VARCHAR(255), country VARCHAR(255), year INT, co2_emissions FLOAT);", "sql": "SELECT MAX(co2_emissions) FROM industrial_processes;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the total CO2 emissions for all mines located in Canada?", "schema": "CREATE TABLE EnvironmentalImpact (ImpactID INT, MineSite VARCHAR(50), Country VARCHAR(50), CO2Emissions INT); INSERT INTO EnvironmentalImpact (ImpactID, MineSite, Country, CO2Emissions) VALUES (1, 'Site A', 'Canada', 500);", "sql": "SELECT SUM(CO2Emissions) FROM EnvironmentalImpact WHERE Country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the maximum score achieved in a single game session?", "schema": "CREATE TABLE game_sessions (session_id INT, player_id INT, score INT); INSERT INTO game_sessions (session_id, player_id, score) VALUES (1, 1, 300), (2, 2, 400), (3, 3, 250);", "sql": "SELECT MAX(score) FROM game_sessions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Operator with a Peak that is 55?", "schema": "CREATE TABLE table_name_47 (operator VARCHAR, peak VARCHAR)", "sql": "SELECT operator FROM table_name_47 WHERE peak = '55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 10).", "schema": null, "sql": "SELECT count(*) FROM varbittmp WHERE a < '1110100111010'::varbit;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the minimum age of players from Japan who have played more than 50 games?", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (1, 'John Doe', 25, 'USA', 100); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (2, 'Jane Smith', 30, 'Canada', 200); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (3, 'Taro Yamada', 24, 'Japan', 75); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (4, 'Sachiko Tanaka', 28, 'Japan', 150);", "sql": "SELECT MIN(Age) FROM Players WHERE Country = 'Japan' AND GamesPlayed > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the number of trains that require maintenance in the Berlin S-Bahn network?", "schema": "CREATE TABLE train_maintenance (train_id INT, network VARCHAR(50), needs_maintenance BOOLEAN); INSERT INTO train_maintenance (train_id, network, needs_maintenance) VALUES (1, 'S-Bahn', true), (2, 'S-Bahn', false), (3, 'U-Bahn', false);", "sql": "SELECT COUNT(*) FROM train_maintenance WHERE network = 'S-Bahn' AND needs_maintenance = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of birth for the player from Ulster and plays at Centre position?", "schema": "CREATE TABLE table_name_26 (date_of_birth__age_ VARCHAR, club_province VARCHAR, position VARCHAR)", "sql": "SELECT date_of_birth__age_ FROM table_name_26 WHERE club_province = 'ulster' AND position = 'centre';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team had 1 race, 1 F/lap and in the series 24 hours of Nurburgring?", "schema": "CREATE TABLE table_name_44 (team VARCHAR, series VARCHAR, races VARCHAR, f_laps VARCHAR)", "sql": "SELECT team FROM table_name_44 WHERE races = '1' AND f_laps = '1' AND series = '24 hours of nurburgring';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the platform for the latest release 5.0.3?", "schema": "CREATE TABLE table_name_53 (platform VARCHAR, latest_stable_release VARCHAR)", "sql": "SELECT platform FROM table_name_53 WHERE latest_stable_release = '5.0.3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When sylvania, oh is the location what is the team nickname?", "schema": "CREATE TABLE table_28211213_2 (team_nickname VARCHAR, location VARCHAR)", "sql": "SELECT team_nickname FROM table_28211213_2 WHERE location = 'Sylvania, OH';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'create_view': Write the SELECT query (example 197).", "schema": null, "sql": "select pg_get_viewdef('vv4', true);", "explanation": "Regression test for Create View in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_viewdef('vv4', true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the match where fitzroy was the away team, where was the venue?", "schema": "CREATE TABLE table_name_47 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_47 WHERE away_team = 'fitzroy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance at the Billericay Town home game?", "schema": "CREATE TABLE table_name_51 (attendance VARCHAR, home_team VARCHAR)", "sql": "SELECT attendance FROM table_name_51 WHERE home_team = 'billericay town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are toranosuke takagi's average laps?", "schema": "CREATE TABLE table_name_14 (laps INTEGER, driver VARCHAR)", "sql": "SELECT AVG(laps) FROM table_name_14 WHERE driver = 'toranosuke takagi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season did he have 7 appearances?", "schema": "CREATE TABLE table_name_91 (season VARCHAR, apps VARCHAR)", "sql": "SELECT season FROM table_name_91 WHERE apps = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Insert a new record into the marine_life table for the 'Shark' species with a population of 1000 in the indian_ocean region.", "schema": "CREATE TABLE marine_life (id INT, species VARCHAR(255), population INT, region VARCHAR(255));", "sql": "INSERT INTO marine_life (id, species, population, region) VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM marine_life), 'Shark', 1000, 'indian_ocean');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which African countries have a smaller population than that of any country in Asia?", "schema": "CREATE TABLE country (Name VARCHAR, Continent VARCHAR, population INTEGER)", "sql": "SELECT Name FROM country WHERE Continent = 'Africa' AND population < (SELECT MIN(population) FROM country WHERE Continent = 'Asia');", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of clubs when Dalian Shide won and Sichuan Quanxing won 4th?", "schema": "CREATE TABLE table_name_35 (number_of_clubs INTEGER, winners VARCHAR, fourth_placed VARCHAR)", "sql": "SELECT AVG(number_of_clubs) FROM table_name_35 WHERE winners = 'dalian shide' AND fourth_placed = 'sichuan quanxing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many picks does Matt Moran have altogether after round 6?", "schema": "CREATE TABLE table_name_72 (pick VARCHAR, name VARCHAR, round VARCHAR)", "sql": "SELECT COUNT(pick) FROM table_name_72 WHERE name = 'matt moran' AND round > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Identify policyholders who have never made a claim in Michigan", "schema": "CREATE TABLE claims (policyholder_id INT, claim_number INT, state VARCHAR(2)); INSERT INTO claims (policyholder_id, claim_number, state) VALUES (1, 1, 'MI'), (2, 1, 'MI'), (3, 1, 'OH');", "sql": "SELECT policyholder_id FROM claims WHERE state = 'MI' GROUP BY policyholder_id HAVING COUNT(claim_number) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 379).", "schema": null, "sql": "DELETE FROM bv1 WHERE a = 6 AND f_leak(b);", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "List all smart contracts in the 'Tezos' network that have a name starting with 'Con'.", "schema": "CREATE TABLE tezos_smart_contracts (id INT, name VARCHAR(255), network VARCHAR(255)); INSERT INTO tezos_smart_contracts (id, name, network) VALUES (1, 'Con1', 'tezos'), (2, 'SC2', 'tezos');", "sql": "SELECT * FROM tezos_smart_contracts WHERE network = 'tezos' AND name LIKE 'Con%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Update the caloric information of product X to 350.", "schema": "CREATE TABLE Food (FoodID varchar(10), FoodName varchar(20), Calories int); INSERT INTO Food VALUES ('X', 'Product X', 400);", "sql": "UPDATE Food SET Calories = 350 WHERE FoodID = 'X';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average age of fans by country?", "schema": "CREATE TABLE fans (fan_id INT, fan_name VARCHAR(50), country VARCHAR(50), age INT);", "sql": "SELECT f.country, AVG(f.age) as avg_age FROM fans f GROUP BY f.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of losses against 1412, and Byes less than 2?", "schema": "CREATE TABLE table_name_4 (losses VARCHAR, against VARCHAR, byes VARCHAR)", "sql": "SELECT COUNT(losses) FROM table_name_4 WHERE against = 1412 AND byes < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which agricultural innovations were implemented in Mexico between 2017 and 2020?", "schema": "CREATE TABLE agricultural_innovations (innovation_id INT, country TEXT, innovation TEXT, implementation_year INT); INSERT INTO agricultural_innovations (innovation_id, country, innovation, implementation_year) VALUES (1, 'Mexico', 'Precision agriculture', 2017), (2, 'Mexico', 'Drip irrigation', 2018), (3, 'Mexico', 'Vertical farming', 2019), (4, 'Mexico', 'Automated harvesting', 2020);", "sql": "SELECT innovation FROM agricultural_innovations WHERE country = 'Mexico' AND implementation_year BETWEEN 2017 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest to par with a place of t8 and West Germany as the country?", "schema": "CREATE TABLE table_name_37 (to_par INTEGER, place VARCHAR, country VARCHAR)", "sql": "SELECT MAX(to_par) FROM table_name_37 WHERE place = 't8' AND country = 'west germany';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Season 7 has a Season 6 of jim treliving?", "schema": "CREATE TABLE table_name_98 (season_7 VARCHAR, season_6 VARCHAR)", "sql": "SELECT season_7 FROM table_name_98 WHERE season_6 = 'jim treliving';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Insert records of companies with female founders and their investments.", "schema": "CREATE TABLE Companies (id INT, name TEXT, founder_gender TEXT); INSERT INTO Companies (id, name, founder_gender) VALUES (1, 'Women in Power', 'Female'), (2, 'Green Innovations', 'Male'); CREATE TABLE Investment_Rounds (id INT, company_name TEXT, round_amount INT, round_type TEXT, date DATE);", "sql": "INSERT INTO Companies (id, name, founder_gender) VALUES (3, 'Code for Equality', 'Female'), (4, 'Sustainable Futures', 'Female'); INSERT INTO Investment_Rounds (id, company_name, round_amount, round_type, date) VALUES (1, 'Women in Power', 2000000, 'Series A', '2021-09-01'), (2, 'Green Innovations', 1500000, 'Seed', '2022-01-10'), (3, 'Code for Equality', 3000000, 'Series B', '2022-03-15'), (4, 'Sustainable Futures', 2500000, 'Seed', '2022-02-22');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 452, "num_statements": 2} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 7).", "schema": null, "sql": "SELECT count(*) FROM timestamptztmp WHERE a > '2018-12-18 10:59:54 GMT+3';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many mental health parity cases were reported in each region?", "schema": "CREATE TABLE MentalHealthParity (CaseID INT, Region VARCHAR(25)); INSERT INTO MentalHealthParity (CaseID, Region) VALUES (1, 'Northeast'), (2, 'Midwest'), (3, 'South'), (4, 'Northeast'), (5, 'West');", "sql": "SELECT Region, COUNT(*) as NumCases FROM MentalHealthParity GROUP BY Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average calorie count for organic products in our inventory?", "schema": "CREATE TABLE Inventory (product_id INT, product_name VARCHAR(100), is_organic BOOLEAN, calorie_count INT); INSERT INTO Inventory (product_id, product_name, is_organic, calorie_count) VALUES (1, 'Apple', true, 95), (2, 'Banana', true, 105), (3, 'Chips', false, 150);", "sql": "SELECT AVG(calorie_count) FROM Inventory WHERE is_organic = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the number of vessels in the fleet of Oceanic Lines that were built before 2010?", "schema": "CREATE TABLE vessels (id INT, name VARCHAR(50), company VARCHAR(50), build_year INT); INSERT INTO vessels (id, name, company, build_year) VALUES (1, 'MV Orion', 'Oceanic Lines', 2005), (2, 'MV Pegasus', 'Oceanic Lines', 2012), (3, 'MV Neptune', 'Oceanic Lines', 2008);", "sql": "SELECT COUNT(*) FROM vessels WHERE company = 'Oceanic Lines' AND build_year < 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show the total amount spent on raw materials for each product in the past quarter.", "schema": "CREATE TABLE raw_materials (material_id INT, product_id INT, amount DECIMAL(10,2)); INSERT INTO raw_materials VALUES (1, 1, 5000), (2, 1, 6000), (3, 2, 7000), (4, 2, 8000), (5, 3, 9000), (6, 3, 10000), (7, 1, 4000), (8, 2, 5000), (9, 3, 6000); CREATE TABLE products (product_id INT, name TEXT); INSERT INTO products VALUES (1, 'Product A'), (2, 'Product B'), (3, 'Product C');", "sql": "SELECT products.name, SUM(raw_materials.amount) AS total_spent FROM raw_materials JOIN products ON raw_materials.product_id = products.product_id WHERE raw_materials.material_id > (SELECT MAX(material_id) - 3 FROM raw_materials) GROUP BY products.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 43).", "schema": null, "sql": "CREATE FUNCTION gin_compare_prefix_timetz(timetz, timetz, int2, internal)\nRETURNS int4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'indexing' (example 472).", "schema": null, "sql": "insert into covidxpart values (3, 1);", "explanation": "DML from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the school that has an enrollment more than 441 with chargers as their mascot?", "schema": "CREATE TABLE table_name_35 (school VARCHAR, enrollment VARCHAR, mascot VARCHAR)", "sql": "SELECT school FROM table_name_35 WHERE enrollment > 441 AND mascot = 'chargers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the number where the player was jimmy demaret", "schema": "CREATE TABLE table_262383_1 (runner_s__up VARCHAR)", "sql": "SELECT 54 AS _holes FROM table_262383_1 WHERE runner_s__up = 'Jimmy Demaret';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total population in Africa with access to clean water?", "schema": "CREATE TABLE WaterAccess (country_name TEXT, continent TEXT, population INTEGER, clean_water_access BOOLEAN); INSERT INTO WaterAccess (country_name, continent, population, clean_water_access) VALUES ('Algeria', 'Africa', 43073003, true), ('Angola', 'Africa', 32898569, false), ('Benin', 'Africa', 12131338, true), ('Botswana', 'Africa', 2359373, true), ('Burkina Faso', 'Africa', 20807289, false), ('Burundi', 'Africa', 11526794, false), ('Cameroon', 'Africa', 25678974, true);", "sql": "SELECT SUM(population) FROM WaterAccess WHERE clean_water_access = true AND continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is High Rebounds, when High Assists is \"Jason Kidd (13)\"?", "schema": "CREATE TABLE table_name_50 (high_rebounds VARCHAR, high_assists VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_50 WHERE high_assists = 'jason kidd (13)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average year that each country formed a military alliance?", "schema": "CREATE TABLE MilitaryAlliances (id INT PRIMARY KEY, name VARCHAR(50), country VARCHAR(50), year_formed INT); INSERT INTO MilitaryAlliances (id, name, country, year_formed) VALUES (1, 'NATO', 'USA', 1949); INSERT INTO MilitaryAlliances (id, name, country, year_formed) VALUES (2, 'Warsaw Pact', 'USSR', 1955);", "sql": "SELECT country, AVG(year_formed) FROM MilitaryAlliances GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total budget for initiatives that are related to \"public_safety\" and were submitted after 2019-01-01 in the \"initiatives\" table?", "schema": "CREATE TABLE initiatives (id INT, title TEXT, budget INT, category TEXT, submit_date DATE);", "sql": "SELECT SUM(budget) FROM initiatives WHERE category = 'public_safety' AND submit_date > '2019-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the highest ANSI code with a latitude more than 47.623288 and a geo id more than 3805508060 with land (sqmi) more than 35.66 and a longitude less than -102.054248", "schema": "CREATE TABLE table_name_98 (ansi_code INTEGER, longitude VARCHAR, land___sqmi__ VARCHAR, latitude VARCHAR, geo_id VARCHAR)", "sql": "SELECT MAX(ansi_code) FROM table_name_98 WHERE latitude > 47.623288 AND geo_id > 3805508060 AND land___sqmi__ > 35.66 AND longitude < -102.054248;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Who are the volunteers for the 'disaster_relief' program and when did they last volunteer?", "schema": "CREATE TABLE program (id INT, name VARCHAR(255)); CREATE TABLE volunteer (id INT, program_id INT, name VARCHAR(255), last_volunteered DATE); INSERT INTO program (id, name) VALUES (1, 'youth_mentoring'), (2, 'disaster_relief'); INSERT INTO volunteer (id, program_id, name, last_volunteered) VALUES (1, 1, 'Alice', '2022-01-01'), (2, 2, 'Bob', '2022-02-15'), (3, 2, 'Charlie', '2022-03-05'), (4, 2, 'David', '2022-03-20'), (5, 1, 'Eve', '2022-04-01');", "sql": "SELECT * FROM volunteer WHERE program_id = (SELECT id FROM program WHERE name = 'disaster_relief');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the distribution of articles by topic in 2020?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(255), topic VARCHAR(50), publication_year INT); INSERT INTO articles (id, title, topic, publication_year) VALUES (1, 'Article1', 'Culture', 2020), (2, 'Article2', 'Politics', 2019), (3, 'Article3', 'Culture', 2018), (4, 'Article4', 'Science', 2020), (5, 'Article5', 'Culture', 2020);", "sql": "SELECT topic, COUNT(*) as count FROM articles WHERE publication_year = 2020 GROUP BY topic ORDER BY count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 124).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('-', 'regoper');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('-', 'regoper')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season was the Formula BMW USA in?", "schema": "CREATE TABLE table_name_87 (season VARCHAR, series VARCHAR)", "sql": "SELECT season FROM table_name_87 WHERE series = 'formula bmw usa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 123).", "schema": null, "sql": "select pg_column_size('aa=>1, b=>2'::hstore || ''::hstore)\n = pg_column_size('aa=>1, b=>2'::hstore);", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Nord in 1952, when tennis borussia berlin was Berlin?", "schema": "CREATE TABLE table_name_3 (nord VARCHAR, berlin VARCHAR, year VARCHAR)", "sql": "SELECT nord FROM table_name_3 WHERE berlin = 'tennis borussia berlin' AND year = 1952;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Which organic products were sold in Europe?", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), is_organic BOOLEAN); INSERT INTO products VALUES (1, 'Lipstick 101', true), (2, 'Eye Shadow 202', false); CREATE TABLE sales (sale_id INT, product_id INT, sale_date DATE, region VARCHAR(50)); INSERT INTO sales VALUES (1, 1, '2022-01-05', 'Europe'), (2, 2, '2022-02-10', 'Asia'), (3, 1, '2022-03-20', 'Europe');", "sql": "SELECT products.product_name FROM products INNER JOIN sales ON products.product_id = sales.product_id WHERE products.is_organic = true AND sales.region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE BUDGET FOR THE INCREDIBLES?", "schema": "CREATE TABLE table_name_28 (budget VARCHAR, film VARCHAR)", "sql": "SELECT budget FROM table_name_28 WHERE film = 'the incredibles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Who are the female founders that have received funding from Indian venture capital firms in the last 5 years?", "schema": "CREATE TABLE investor (id INT, name VARCHAR(50), type VARCHAR(50), total_investments INT, total_assets FLOAT); CREATE TABLE investment (id INT, investor_id INT, company_id INT, round VARCHAR(50), invested_amount FLOAT, investment_date DATE); CREATE TABLE company (id INT, name VARCHAR(50), founding_year INT, industry VARCHAR(50), ceo_id INT, gender VARCHAR(10));", "sql": "SELECT c.name, i.name FROM company c INNER JOIN investment inv ON c.id = inv.company_id INNER JOIN investor i ON inv.investor_id = i.id WHERE c.gender = 'Female' AND i.type = 'Venture Capital' AND investment_date >= DATEADD(year, -5, GETDATE()) AND inv.investor_id IN (SELECT id FROM investor WHERE location = 'India');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 319, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which the lowest Score-Fina has a Rank-Final of 7 and a Year larger than 2009?", "schema": "CREATE TABLE table_name_26 (score_final INTEGER, rank_final VARCHAR, year VARCHAR)", "sql": "SELECT MIN(score_final) FROM table_name_26 WHERE rank_final = 7 AND year > 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'int4': Write the SELECT query (example 59).", "schema": null, "sql": "SELECT (-2147483648)::int4 / (-1)::int2;", "explanation": "Regression test for Int4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (-2147483648)::int4 / (-1)::int2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Club or province has a caps larger than 15 and has Chris Cusiter playing for them?", "schema": "CREATE TABLE table_name_88 (club_province VARCHAR, caps VARCHAR, player VARCHAR)", "sql": "SELECT club_province FROM table_name_88 WHERE caps > 15 AND player = 'chris cusiter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 309).", "schema": null, "sql": "SELECT * FROM check_test(\n results_eq( 'cwant'::refcursor, 'chave'::refcursor ),\n true,\n 'results_eq(cursor, cursor)',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Provide the average safety test scores for vehicles from each manufacturer", "schema": "CREATE TABLE vehicle_safety_testing (id INT PRIMARY KEY, manufacturer VARCHAR(255), model VARCHAR(255), test_score INT);", "sql": "SELECT manufacturer, AVG(test_score) as avg_score FROM vehicle_safety_testing GROUP BY manufacturer;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Which mobile plans have more than 500 subscribers?", "schema": "CREATE TABLE subscriber_data (subscriber_id INT, plan_id INT, age INT); INSERT INTO subscriber_data (subscriber_id, plan_id, age) VALUES (1, 1, 25), (2, 2, 35), (3, 3, 45), (4, 1, 30), (5, 2, 40), (6, 3, 50);", "sql": "SELECT plan_id, COUNT(*) AS subscriber_count FROM subscriber_data GROUP BY plan_id HAVING subscriber_count > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the players' first name and last name who won award both in 1960 and in 1961.", "schema": "CREATE TABLE player (name_first VARCHAR, name_last VARCHAR); CREATE TABLE player_award (year VARCHAR)", "sql": "SELECT T1.name_first, T1.name_last FROM player AS T1 JOIN player_award AS T2 WHERE T2.year = 1960 INTERSECT SELECT T1.name_first, T1.name_last FROM player AS T1 JOIN player_award AS T2 WHERE T2.year = 1961;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 88).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (1,9,'-24926804.045047420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "How many graduate students in the Physics department have a GPA of at least 3.5?", "schema": "CREATE SCHEMA if not exists higher_ed;CREATE TABLE if not exists higher_ed.students(id INT, name VARCHAR(255), department VARCHAR(255), gpa DECIMAL(3,2));", "sql": "SELECT COUNT(*) FROM higher_ed.students WHERE department = 'Physics' AND gpa >= 3.5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many users are from Germany in the Workout table?", "schema": "CREATE TABLE Workout (user_id INT, workout_duration INT, country VARCHAR(50)); INSERT INTO Workout (user_id, workout_duration, country) VALUES (1, 30, 'Germany'), (2, 40, 'USA'), (3, 50, 'Germany');", "sql": "SELECT COUNT(*) FROM Workout WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 203).", "schema": null, "sql": "SELECT * FROM check_test(\n has_leftop( '+', 'bigint', 'desc' ),\n true,\n 'has_leftop( name, right, desc )',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the total revenue for each game?", "schema": "CREATE TABLE Game (GameID INT, GameName VARCHAR(100), Genre VARCHAR(50), Price DECIMAL(5, 2)); INSERT INTO Game (GameID, GameName, Genre, Price) VALUES (1, 'GameA', 'Shooter', 50.00), (2, 'GameB', 'Strategy', 60.00), (3, 'GameC', 'RPG', 40.00); CREATE TABLE Sales (SaleID INT, GameID INT, Quantity INT); INSERT INTO Sales (SaleID, GameID, Quantity) VALUES (1, 1, 100), (2, 2, 150), (3, 1, 50), (4, 3, 200);", "sql": "SELECT GameID, SUM(Price * Quantity) FROM Game G JOIN Sales S ON G.GameID = S.GameID GROUP BY GameID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Attendance on September 26, 1971?", "schema": "CREATE TABLE table_name_86 (attendance INTEGER, date VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_86 WHERE date = 'september 26, 1971';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the name of the defense diplomacy events held by India with ASEAN countries in 2018?", "schema": "CREATE TABLE defense_diplomacy (id INT, country VARCHAR(255), year INT, event_name VARCHAR(255)); INSERT INTO defense_diplomacy (id, country, year, event_name) VALUES (1, 'Indonesia', 2018, 'India-Indonesia Defense Cooperation Dialogue');", "sql": "SELECT DISTINCT event_name FROM defense_diplomacy WHERE country LIKE 'ASEAN%' AND year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What was the total budget allocated for environmental services in 2020, 2021, and 2022?", "schema": "CREATE TABLE EnvBudget (Year INT, Amount INT); INSERT INTO EnvBudget (Year, Amount) VALUES (2020, 1200000), (2021, 1300000), (2022, 1400000);", "sql": "SELECT Year, SUM(Amount) FROM EnvBudget GROUP BY Year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the percentage change in the number of public hospitals in each region between 2018 and 2021?", "schema": "CREATE TABLE regions (region_name VARCHAR(50), year INT, num_hospitals INT); INSERT INTO regions VALUES ('Region 1', 2018, 50); INSERT INTO regions VALUES ('Region 1', 2019, 52); INSERT INTO regions VALUES ('Region 1', 2020, 54); INSERT INTO regions VALUES ('Region 1', 2021, 56); INSERT INTO regions VALUES ('Region 2', 2018, 40); INSERT INTO regions VALUES ('Region 2', 2019, 43); INSERT INTO regions VALUES ('Region 2', 2020, 45); INSERT INTO regions VALUES ('Region 2', 2021, 47);", "sql": "SELECT region_name, (LAG(num_hospitals, 3) OVER (PARTITION BY region_name ORDER BY year) - num_hospitals) * 100.0 / NULLIF(LAG(num_hospitals, 3) OVER (PARTITION BY region_name ORDER BY year), 0) as percentage_change FROM regions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the document id with paragraph text 'Brazil' and 'Ireland'.", "schema": "CREATE TABLE Paragraphs (document_id VARCHAR, paragraph_text VARCHAR)", "sql": "SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Brazil' INTERSECT SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Ireland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "List the names of all graduate students who have not published an academic paper.", "schema": "CREATE TABLE paper (id INT, student_id INT); INSERT INTO paper (id, student_id) VALUES (1, 1), (2, 2), (3, 3); CREATE TABLE student (id INT, name TEXT); INSERT INTO student (id, name) VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'), (4, 'Diana');", "sql": "SELECT name FROM student WHERE id NOT IN (SELECT student_id FROM paper);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum number of points scored in a volleyball game?", "schema": "CREATE TABLE games (id INT, team1 VARCHAR(50), team2 VARCHAR(50), points_team1 INT, points_team2 INT);", "sql": "SELECT MAX(GREATEST(points_team1, points_team2)) FROM games;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which neighborhood in Austin has the most affordable housing?", "schema": "CREATE TABLE housing_units (id INT, neighborhood TEXT, city TEXT, state TEXT, price FLOAT, is_affordable BOOLEAN);", "sql": "SELECT neighborhood, COUNT(*) as total_affordable FROM housing_units WHERE city = 'Austin' AND is_affordable = TRUE GROUP BY 1 HAVING total_affordable > (SELECT AVG(total_affordable) FROM (SELECT COUNT(*) as total_affordable FROM housing_units WHERE city = 'Austin' AND is_affordable = TRUE GROUP BY neighborhood) as subquery);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 327, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the winning driver for round 7", "schema": "CREATE TABLE table_name_24 (winning_driver VARCHAR, round VARCHAR)", "sql": "SELECT winning_driver FROM table_name_24 WHERE round = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of points scored by the 'Warriors' in the NBA since the year 2015?", "schema": "CREATE TABLE games (game_id INT, team1 VARCHAR(50), team2 VARCHAR(50), league VARCHAR(50), season INT, year INT, points1 INT, points2 INT); INSERT INTO games (game_id, team1, team2, league, season, year, points1, points2) VALUES (1, 'Warriors', 'Cavaliers', 'NBA', 2015, 2015, 104, 91);", "sql": "SELECT SUM(points1) FROM games WHERE team1 = 'Warriors' AND year >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the notes for the shirt that said Pavv before 2005?", "schema": "CREATE TABLE table_name_57 (notes VARCHAR, shirt_printing VARCHAR, year VARCHAR)", "sql": "SELECT notes FROM table_name_57 WHERE shirt_printing = 'pavv' AND year < 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "How many animals of each species are there in 'Nature's Guardians'?", "schema": "CREATE TABLE Nature_s_Guardians (Animal_ID INT, Animal_Name VARCHAR(50), Species VARCHAR(50), Age INT); INSERT INTO Nature_s_Guardians VALUES (1, 'Bambi', 'Deer', 3); INSERT INTO Nature_s_Guardians VALUES (2, 'Fiona', 'Turtle', 10); INSERT INTO Nature_s_Guardians VALUES (3, 'Chirpy', 'Eagle', 5); INSERT INTO Nature_s_Guardians VALUES (4, 'Whiskers', 'Raccoon', 2); INSERT INTO Nature_s_Guardians VALUES (5, 'Bella', 'Deer', 4);", "sql": "SELECT Species, COUNT(*) AS Number_of_Animals FROM Nature_s_Guardians GROUP BY Species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average life expectancy for individuals in urban areas compared to rural areas?", "schema": "CREATE TABLE population_data (id INT, name VARCHAR(50), location VARCHAR(50), life_expectancy FLOAT); INSERT INTO population_data VALUES (1, 'John Doe', 'Urban', 78.5), (2, 'Jane Smith', 'Rural', 75.3), (3, 'Jim Brown', 'Urban', 79.2), (4, 'Jake White', 'Rural', 74.8);", "sql": "SELECT AVG(life_expectancy) AS avg_life_expectancy, location FROM population_data GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest played when there was less than 74 goals against and points 1 of 36 2?", "schema": "CREATE TABLE table_name_30 (played INTEGER, points_1 VARCHAR, goals_against VARCHAR)", "sql": "SELECT MAX(played) FROM table_name_30 WHERE points_1 = '36 2' AND goals_against < 74;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the percentage of smart city projects completed in each quarter?", "schema": "CREATE TABLE smart_city_projects (project_name TEXT, completion_date DATE); INSERT INTO smart_city_projects VALUES ('Project1', '2022-01-15'), ('Project2', '2022-02-20'), ('Project3', '2022-03-05'), ('Project4', '2022-04-10');", "sql": "SELECT DATE_TRUNC('quarter', completion_date) AS quarter, COUNT(project_name) AS projects, COUNT(project_name) * 100.0 / (SELECT COUNT(project_name) FROM smart_city_projects) AS percentage FROM smart_city_projects GROUP BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 1} {"question": "Find the average speed of vessels per trip", "schema": "VESSEL(vessel_id, voyage_id, max_speed); TRIP(voyage_id, avg_speed)", "sql": "SELECT v.vessel_id, AVG(t.avg_speed) AS avg_speed_per_vessel FROM VESSEL v JOIN TRIP t ON v.voyage_id = t.voyage_id GROUP BY v.vessel_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "How many renewable energy projects are there in total for each country?", "schema": "CREATE TABLE projects (name TEXT, type TEXT, country TEXT); INSERT INTO projects (name, type, country) VALUES ('Project 1', 'Wind', 'USA'), ('Project 2', 'Solar', 'Germany'), ('Project 3', 'Wind', 'France');", "sql": "SELECT country, COUNT(*) FROM projects GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'boolean': Write the SELECT query (example 85).", "schema": null, "sql": "SELECT isfalse OR isfalse OR isnul FROM booltbl4;", "explanation": "Regression test for Boolean in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT isfalse OR isfalse OR isnul FROM booltbl4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many results do Ellery and Frankie have?", "schema": "CREATE TABLE table_19744915_15 (result VARCHAR, couple VARCHAR)", "sql": "SELECT COUNT(result) FROM table_19744915_15 WHERE couple = 'Ellery and Frankie';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the game when Police S.A. was the opponent?", "schema": "CREATE TABLE table_name_78 (location VARCHAR, opponent_team VARCHAR)", "sql": "SELECT location FROM table_name_78 WHERE opponent_team = 'police s.a.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the number of sustainable seafood certifications in Japan and the United States?", "schema": "CREATE TABLE Certifications (country VARCHAR(20), certification_type VARCHAR(20), is_sustainable BOOLEAN); INSERT INTO Certifications (country, certification_type, is_sustainable) VALUES ('Japan', 'MSC', TRUE); INSERT INTO Certifications (country, certification_type, is_sustainable) VALUES ('Japan', 'ASC', FALSE); INSERT INTO Certifications (country, certification_type, is_sustainable) VALUES ('United States', 'MSC', TRUE); INSERT INTO Certifications (country, certification_type, is_sustainable) VALUES ('United States', 'ASC', TRUE);", "sql": "SELECT COUNT(*) FROM Certifications WHERE country IN ('Japan', 'United States') AND is_sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names and scores of wines that are made of white color grapes?", "schema": "CREATE TABLE GRAPES (Grape VARCHAR, Color VARCHAR); CREATE TABLE WINE (Name VARCHAR, Score VARCHAR, Grape VARCHAR)", "sql": "SELECT T2.Name, T2.Score FROM GRAPES AS T1 JOIN WINE AS T2 ON T1.Grape = T2.Grape WHERE T1.Color = 'White';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the maximum and minimum number of attendees at cultural events in each state, grouped by state?", "schema": "CREATE TABLE cultural_events (id INT, name VARCHAR(255), state VARCHAR(255), attendance INT);", "sql": "SELECT state, MAX(attendance) AS max_attendance, MIN(attendance) AS min_attendance FROM cultural_events GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the maximum duration (in days) of completed missions?", "schema": "CREATE TABLE mission_duration (mission_name VARCHAR(50), mission_status VARCHAR(50), duration INT);", "sql": "SELECT mission_status, MAX(duration) as max_duration FROM mission_duration WHERE mission_status = 'completed' GROUP BY mission_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rank for the city of sewri?", "schema": "CREATE TABLE table_name_46 (rank VARCHAR, city VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_46 WHERE city = 'sewri';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 15).", "schema": null, "sql": "SELECT * FROM INT8_TBL WHERE q2 < 4567890123456789;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INT8_TBL WHERE q2 < 4567890123456789) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Import (example 8).", "schema": null, "sql": "-- test import and simple argument handling\n--\nSELECT import_test_one('sha hash of this string');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Import.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people did Mayor Olav Martin Vik preside over?", "schema": "CREATE TABLE table_name_87 (innhabitants INTEGER, mayor VARCHAR)", "sql": "SELECT AVG(innhabitants) FROM table_name_87 WHERE mayor = 'olav martin vik';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Insert a new record of a whale sighting in the Atlantic Ocean", "schema": "CREATE TABLE whale_sightings (id INT PRIMARY KEY, species VARCHAR(255), location VARCHAR(255), sighting_date DATE);", "sql": "INSERT INTO whale_sightings (id, species, location, sighting_date) VALUES (1, 'Blue Whale', 'Atlantic Ocean', '2023-03-12');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the match points score for the match won by Connacht?", "schema": "CREATE TABLE table_28068063_2 (match_points VARCHAR, winners VARCHAR)", "sql": "SELECT match_points FROM table_28068063_2 WHERE winners = 'Connacht';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What are the drug approval dates for each drug in the 'DrugApproval' table, grouped by drug name?", "schema": "CREATE TABLE DrugApproval (drug_name VARCHAR(255), approval_date DATE); INSERT INTO DrugApproval (drug_name, approval_date) VALUES ('DrugA', '2020-01-01'), ('DrugB', '2019-05-15'), ('DrugC', '2018-12-31');", "sql": "SELECT drug_name, MIN(approval_date) as first_approval_date FROM DrugApproval GROUP BY drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the ends for transfer fee of free and goals being 0 for roy carroll", "schema": "CREATE TABLE table_name_53 (ends VARCHAR, name VARCHAR, transfer_fee VARCHAR, goals VARCHAR)", "sql": "SELECT ends FROM table_name_53 WHERE transfer_fee = 'free' AND goals = 0 AND name = 'roy carroll';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the African countries that have a population less than any country in Asia?", "schema": "CREATE TABLE country (Name VARCHAR, Continent VARCHAR, population INTEGER)", "sql": "SELECT Name FROM country WHERE Continent = 'Africa' AND population < (SELECT MAX(population) FROM country WHERE Continent = 'Asia');", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the location when the year is after 1974 and the date is september 19?", "schema": "CREATE TABLE table_name_36 (location VARCHAR, year VARCHAR, date VARCHAR)", "sql": "SELECT location FROM table_name_36 WHERE year > 1974 AND date = 'september 19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Position has a Round smaller than 14, and a College of illinois?", "schema": "CREATE TABLE table_name_88 (position VARCHAR, round VARCHAR, college VARCHAR)", "sql": "SELECT position FROM table_name_88 WHERE round < 14 AND college = 'illinois';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total number of community health workers by gender in the United States?", "schema": "CREATE TABLE Genders (gender_id INT, gender_name TEXT); CREATE TABLE CommunityHealthWorkers (worker_id INT, worker_gender INT, country_id INT);", "sql": "SELECT COUNT(*) as total_workers, g.gender_name FROM CommunityHealthWorkers chw JOIN Genders g ON chw.worker_gender = g.gender_id WHERE country_id = 1 GROUP BY chw.worker_gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 104).", "schema": null, "sql": "select '($)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '($)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "What is the average water consumption of factories in each country?", "schema": "CREATE TABLE Factories (id INT, name TEXT, country TEXT, water_consumption DECIMAL(5,2)); INSERT INTO Factories (id, name, country, water_consumption) VALUES (1, 'Factory A', 'USA', 12000.00), (2, 'Factory B', 'Mexico', 15000.00), (3, 'Factory C', 'India', 8000.00), (4, 'Factory D', 'Bangladesh', 10000.00), (5, 'Factory E', 'China', 13000.00);", "sql": "SELECT country, AVG(water_consumption) FROM Factories GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many users have a heart rate above 150 during evening workouts?", "schema": "CREATE TABLE workouts (id INT, user_id INT, heart_rate INT, workout_time TIME); INSERT INTO workouts (id, user_id, heart_rate, workout_time) VALUES (1, 1, 160, '18:00:00');", "sql": "SELECT COUNT(*) FROM workouts WHERE heart_rate > 150 AND workout_time BETWEEN '18:00:00' AND '23:59:59';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the district for tim lee hall", "schema": "CREATE TABLE table_1341672_14 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1341672_14 WHERE incumbent = 'Tim Lee Hall';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score when 43,746 attended?", "schema": "CREATE TABLE table_name_50 (score VARCHAR, attendance VARCHAR)", "sql": "SELECT score FROM table_name_50 WHERE attendance = '43,746';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the apparent magnitude is 10.5, what is the right ascension?", "schema": "CREATE TABLE table_name_71 (right_ascension___j2000__ VARCHAR, apparent_magnitude VARCHAR)", "sql": "SELECT right_ascension___j2000__ FROM table_name_71 WHERE apparent_magnitude = 10.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the rufus guest for episode 1x07", "schema": "CREATE TABLE table_19930660_1 (rufus_guest VARCHAR, episode VARCHAR)", "sql": "SELECT rufus_guest FROM table_19930660_1 WHERE episode = '1x07';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Insert new soil moisture data for field ID 67890.", "schema": "CREATE TABLE soil_moisture (field_id INT, date DATE, moisture DECIMAL(5,2));", "sql": "INSERT INTO soil_moisture (field_id, date, moisture) VALUES (67890, '2022-02-01', 35.0), (67890, '2022-02-02', 37.0), (67890, '2022-02-03', 33.0);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years has 1 run?", "schema": "CREATE TABLE table_name_53 (year VARCHAR, score VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_53 WHERE score = '1 run';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 369).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (8,9,'25001685.045047420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 324).", "schema": null, "sql": "-- Try an empty set in the second arg.\nSELECT * FROM check_test(\n set_has( 'anames', 'SELECT id, name FROM annames WHERE false' ),\n true,\n 'set_has( prepared, empty )',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many schools did Bubba Starling attend?", "schema": "CREATE TABLE table_11677100_17 (school VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(school) FROM table_11677100_17 WHERE player = 'Bubba Starling';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district is Joseph Lecompte in office in?", "schema": "CREATE TABLE table_2668243_8 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_2668243_8 WHERE incumbent = 'Joseph Lecompte';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the genre for the hot fm station?", "schema": "CREATE TABLE table_1601792_3 (genre VARCHAR, station VARCHAR)", "sql": "SELECT genre FROM table_1601792_3 WHERE station = 'Hot FM';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average weight of pottery artifacts?", "schema": "CREATE TABLE ArtifactTypes (id INT, name VARCHAR(255), category VARCHAR(255)); INSERT INTO ArtifactTypes (id, name, category) VALUES (1, 'Pottery', 'Artifact'); CREATE TABLE Artifacts (id INT, artifactTypeId INT, name VARCHAR(255), weight FLOAT); INSERT INTO Artifacts (id, artifactTypeId, weight) VALUES (1, 1, 1.5), (2, 1, 2.2), (3, 1, 1.8);", "sql": "SELECT AVG(weight) FROM Artifacts WHERE artifactTypeId = (SELECT id FROM ArtifactTypes WHERE name = 'Pottery');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the minimum depth of any ocean floor mapping project site in the 'Ocean' schema?", "schema": "CREATE SCHEMA Ocean; CREATE TABLE Mapping (site_id INT, depth FLOAT); INSERT INTO Mapping (site_id, depth) VALUES (1, 5000.2), (2, 4000.3), (3, 3000.4), (4, 2000.5), (5, 1000.6);", "sql": "SELECT MIN(depth) FROM Ocean.Mapping;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "What is the number of building permits issued for each type of building in the city of Chicago?", "schema": "CREATE TABLE permit (id INT, city VARCHAR(20), type VARCHAR(20), permit_number INT); INSERT INTO permit (id, city, type, permit_number) VALUES (1, 'Chicago', 'Residential', 100), (2, 'Chicago', 'Commercial', 150), (3, 'LA', 'Residential', 80);", "sql": "SELECT type, COUNT(permit_number) FROM permit WHERE city = 'Chicago' GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent at the game with a result of W 13–12?", "schema": "CREATE TABLE table_name_83 (opponent VARCHAR, result VARCHAR)", "sql": "SELECT opponent FROM table_name_83 WHERE result = 'w 13–12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Identify the total biomass of fish in farms located in the Mediterranean sea?", "schema": "CREATE TABLE fish_farms (id INT, name TEXT, country TEXT, biomass FLOAT); INSERT INTO fish_farms (id, name, country) VALUES (1, 'Farm C', 'Italy'); INSERT INTO fish_farms (id, name, country) VALUES (2, 'Farm D', 'Spain'); CREATE TABLE biomass_data (farm_id INT, biomass FLOAT); INSERT INTO biomass_data (farm_id, biomass) VALUES (1, 500.3); INSERT INTO biomass_data (farm_id, biomass) VALUES (2, 600.5);", "sql": "SELECT SUM(bd.biomass) FROM fish_farms ff JOIN biomass_data bd ON ff.id = bd.farm_id WHERE ff.country LIKE '%Mediterranean%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position was the player who was drafted by the Washington Capitals?", "schema": "CREATE TABLE table_2850912_10 (position VARCHAR, nhl_team VARCHAR)", "sql": "SELECT position FROM table_2850912_10 WHERE nhl_team = 'Washington Capitals';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total waste produced per production line per week?", "schema": "create table ProductionWaste (ProductionLine varchar(255), Waste int, Timestamp datetime); insert into ProductionWaste values ('ProductionLine1', 10, '2022-01-01 00:00:00'), ('ProductionLine2', 15, '2022-01-01 01:00:00'), ('ProductionLine1', 12, '2022-01-08 02:00:00');", "sql": "select ProductionLine, DATE_PART('week', Timestamp) as Week, SUM(Waste) as TotalWaste from ProductionWaste group by ProductionLine, Week;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest term limit for the senator who resides in Coshocton?", "schema": "CREATE TABLE table_26129220_2 (term_limited INTEGER, residence VARCHAR)", "sql": "SELECT MIN(term_limited) FROM table_26129220_2 WHERE residence = 'Coshocton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Find the regulatory frameworks that were implemented after the regulatory framework with ID 5.", "schema": "CREATE TABLE regulatory_frameworks (id INT, name VARCHAR, implementation_date DATE); INSERT INTO regulatory_frameworks (id, name, implementation_date) VALUES (1, 'RF1', '2021-01-01'), (2, 'RF2', '2021-02-02'), (3, 'RF3', '2021-03-03'), (4, 'RF4', '2021-04-04'), (5, 'RF5', '2021-05-05'), (6, 'RF6', '2021-06-06'), (7, 'RF7', '2021-07-07');", "sql": "SELECT * FROM regulatory_frameworks WHERE implementation_date > (SELECT implementation_date FROM regulatory_frameworks WHERE id = 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the minimum monthly bill for broadband subscribers in the city of San Francisco?", "schema": "CREATE TABLE broadband_subscribers (subscriber_id INT, monthly_bill FLOAT, city VARCHAR(20)); INSERT INTO broadband_subscribers (subscriber_id, monthly_bill, city) VALUES (1, 60.5, 'San Francisco'), (2, 70.3, 'Houston'), (3, 55.7, 'San Francisco');", "sql": "SELECT MIN(monthly_bill) FROM broadband_subscribers WHERE city = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average number of visitors for natural destinations in each country?", "schema": "CREATE TABLE Destinations (id INT PRIMARY KEY, country_id INT, name VARCHAR(255), type VARCHAR(255)); INSERT INTO Destinations (id, country_id, name, type) VALUES (1, 1, 'Sydney Opera House', 'Cultural'); INSERT INTO Destinations (id, country_id, name, type) VALUES (2, 1, 'Great Barrier Reef', 'Natural'); INSERT INTO Destinations (id, country_id, name, type) VALUES (3, 2, 'Niagara Falls', 'Natural'); INSERT INTO Destinations (id, country_id, name, type) VALUES (4, 2, 'CN Tower', 'Architectural');", "sql": "SELECT country_id, AVG(visitors) FROM Tourists t JOIN Destinations d ON t.country_id = d.country_id WHERE d.type = 'Natural' GROUP BY country_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many womens singles entries are there when womens doubles is li xiaodan wen jia?", "schema": "CREATE TABLE table_28138035_27 (womens_singles VARCHAR, womens_doubles VARCHAR)", "sql": "SELECT COUNT(womens_singles) FROM table_28138035_27 WHERE womens_doubles = 'Li Xiaodan Wen Jia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for set 2 when the score of set 1 is 19–25?", "schema": "CREATE TABLE table_name_70 (set_2 VARCHAR, set_1 VARCHAR)", "sql": "SELECT set_2 FROM table_name_70 WHERE set_1 = '19–25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many dogs have an age below the average?", "schema": "CREATE TABLE Dogs (age INTEGER)", "sql": "SELECT COUNT(*) FROM Dogs WHERE age < (SELECT AVG(age) FROM Dogs);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_merge': Write the SELECT query (example 165).", "schema": null, "sql": "SELECT * FROM sales_list WHERE salesperson_name = 'Ivanov';", "explanation": "Regression test for Partition Merge in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM sales_list WHERE salesperson_name = 'Ivanov') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When maker is the model what is the nupowr 167?", "schema": "CREATE TABLE table_3002894_4 (nupowr_167 VARCHAR, model VARCHAR)", "sql": "SELECT nupowr_167 FROM table_3002894_4 WHERE model = 'Maker';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Venue has an Away team of south melbourne?", "schema": "CREATE TABLE table_name_89 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_89 WHERE away_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of community education programs in each country?", "schema": "CREATE TABLE education_programs (id INT, name VARCHAR(50), country VARCHAR(50), programs INT); INSERT INTO education_programs (id, name, country, programs) VALUES (1, 'Wildlife Awareness', 'Brazil', 10), (2, 'Habitat Conservation', 'Kenya', 5), (3, 'Biodiversity Education', 'Indonesia', 15);", "sql": "SELECT country, SUM(programs) FROM education_programs GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the driver richard attwood has a constructor of brm, what is the number of laps?", "schema": "CREATE TABLE table_name_87 (laps VARCHAR, constructor VARCHAR, driver VARCHAR)", "sql": "SELECT laps FROM table_name_87 WHERE constructor = 'brm' AND driver = 'richard attwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the vehicle category is multi-axle vehicle, what is the till aligarh?", "schema": "CREATE TABLE table_19787093_1 (till_aligarh INTEGER, vehicle_category VARCHAR)", "sql": "SELECT MIN(till_aligarh) FROM table_19787093_1 WHERE vehicle_category = 'Multi-axle Vehicle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score at 3:27?", "schema": "CREATE TABLE table_name_8 (score VARCHAR, time VARCHAR)", "sql": "SELECT score FROM table_name_8 WHERE time = '3:27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Score has an Outcome of runner-up, and a Tournament of johannesburg?", "schema": "CREATE TABLE table_name_69 (score VARCHAR, outcome VARCHAR, tournament VARCHAR)", "sql": "SELECT score FROM table_name_69 WHERE outcome = 'runner-up' AND tournament = 'johannesburg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the average and minimum price (in Euro) of all products?", "schema": "CREATE TABLE catalog_contents (price_in_euros INTEGER)", "sql": "SELECT AVG(price_in_euros), MIN(price_in_euros) FROM catalog_contents;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average age of fans who have attended at least one home game for the NY Knicks and live in New York?", "schema": "CREATE TABLE fans (fan_id INT, age INT, city VARCHAR(20)); CREATE TABLE tickets (ticket_id INT, fan_id INT, team_id INT); CREATE TABLE teams (team_id INT, team_name VARCHAR(20)); INSERT INTO teams (team_id, team_name) VALUES (1, 'NY Knicks'); INSERT INTO fans (fan_id, age, city) VALUES (1, 30, 'New York'); INSERT INTO tickets (ticket_id, fan_id, team_id) VALUES (1, 1, 1);", "sql": "SELECT AVG(fans.age) FROM fans INNER JOIN tickets ON fans.fan_id = tickets.fan_id INNER JOIN teams ON tickets.team_id = teams.team_id WHERE teams.team_name = 'NY Knicks' AND fans.city = 'New York' GROUP BY fans.age HAVING COUNT(fans.fan_id) >= 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 246, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHich Record has a Visitor of quebec nordiques with a Score of 7–5?", "schema": "CREATE TABLE table_name_37 (record VARCHAR, visitor VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_37 WHERE visitor = 'quebec nordiques' AND score = '7–5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the most popular game genre among female players?", "schema": "CREATE TABLE PlayerDemographics (PlayerID INT, Gender VARCHAR(10)); INSERT INTO PlayerDemographics (PlayerID, Gender) VALUES (1, 'Male'); INSERT INTO PlayerDemographics (PlayerID, Gender) VALUES (2, 'Female');", "sql": "SELECT g.Genre, COUNT(*) AS Popularity FROM Games g JOIN PlayerDemographics pd ON 1 = 1 JOIN GameSales gs ON g.GameID = gs.GameID GROUP BY g.Genre HAVING SUM(CASE WHEN pd.Gender = 'Female' THEN 1 ELSE 0 END) / COUNT(*) > 0.5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "What is the distribution of funding amounts by round type?", "schema": "CREATE TABLE funding_rounds (company_id INT, round_amount DECIMAL(10,2), round_type VARCHAR(20), year INT); INSERT INTO funding_rounds (company_id, round_amount, round_type, year) VALUES (1, 2000000, 'Seed', 2016), (2, 5000000, 'Series A', 2017), (3, 3000000, 'Seed', 2018), (4, 6000000, 'Series B', 2019);", "sql": "SELECT f.round_type, AVG(f.round_amount), STDDEV(f.round_amount), MIN(f.round_amount), MAX(f.round_amount) FROM funding_rounds f GROUP BY f.round_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the percentage of faculty members in the Humanities department who identify as underrepresented minorities?", "schema": "CREATE TABLE faculty (id INT, name VARCHAR(50), department VARCHAR(50), minority VARCHAR(10)); INSERT INTO faculty (id, name, department, minority) VALUES (1, 'Judy', 'Humanities', 'Yes'); INSERT INTO faculty (id, name, department, minority) VALUES (2, 'Kevin', 'Engineering', 'No');", "sql": "SELECT f.department, ROUND(100.0 * COUNT(CASE WHEN f.minority = 'Yes' THEN 1 END) / COUNT(*), 2) AS pct_minority_faculty FROM faculty f GROUP BY f.department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the total number of violations for each ethical fashion brand?", "schema": "CREATE TABLE Violations (violation_id INT, brand_id INT, violation_count INT); CREATE TABLE Brands (brand_id INT, brand_name VARCHAR(50), ethical BOOLEAN);", "sql": "SELECT B.brand_name, SUM(violation_count) FROM Violations V INNER JOIN Brands B ON V.brand_id = B.brand_id WHERE B.ethical = TRUE GROUP BY B.brand_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Delete records of artists who have not received any funding.", "schema": "CREATE TABLE Artists (artist_id INT, artist_name VARCHAR(255), community_identifier VARCHAR(255)); CREATE TABLE Funding (funding_id INT, artist_id INT, funding_amount DECIMAL(10,2));", "sql": "DELETE FROM Artists WHERE artist_id NOT IN (SELECT artist_id FROM Funding);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total cost of materials sourced from local suppliers by each manufacturer?", "schema": "CREATE TABLE manufacturers (id INT, name TEXT); INSERT INTO manufacturers (id, name) VALUES (1, 'MNO'), (2, 'PQR'); CREATE TABLE local_suppliers (id INT, manufacturer_id INT, cost FLOAT); INSERT INTO local_suppliers (id, manufacturer_id, cost) VALUES (1, 1, 5000.00), (2, 1, 7000.00), (3, 2, 8000.00); CREATE TABLE materials (id INT, local_supplier_id INT, quantity INT); INSERT INTO materials (id, local_supplier_id, quantity) VALUES (1, 1, 100), (2, 2, 150), (3, 3, 200);", "sql": "SELECT m.name, SUM(l.cost * m.quantity) FROM manufacturers m JOIN local_suppliers l ON m.id = l.manufacturer_id JOIN materials m ON l.id = m.local_supplier_id GROUP BY m.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Determine the maximum pressure (in bar) for any chemical storage unit in Mexico during the first week of October.", "schema": "CREATE TABLE StorageUnits (id INT, location VARCHAR(50), pressure FLOAT, day INT); INSERT INTO StorageUnits (id, location, pressure, day) VALUES (1, 'Mexico', 10.5, 3), (2, 'Mexico', 11.2, 5), (3, 'Mexico', 9.8, 7);", "sql": "SELECT MAX(pressure) FROM StorageUnits WHERE location = 'Mexico' AND day BETWEEN 1 AND 7 AND EXTRACT(MONTH FROM DATE '2022-10-01' + INTERVAL (day-1) DAY) = 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "How many wastewater treatment plants are in Canada and the United States combined?", "schema": "CREATE TABLE WastewaterTreatmentPlants (country VARCHAR(20), num_plants INT);", "sql": "SELECT COUNT(*) FROM WastewaterTreatmentPlants WHERE country IN ('Canada', 'United States');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the class of facility ID 150935?", "schema": "CREATE TABLE table_name_21 (class VARCHAR, facility_id VARCHAR)", "sql": "SELECT class FROM table_name_21 WHERE facility_id = 150935;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the percentage of hotels in Sydney that have adopted AI technology?", "schema": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, city TEXT, ai_adoption INT); INSERT INTO hotels (hotel_id, hotel_name, city, ai_adoption) VALUES (1, 'The Park Hyatt Sydney', 'Sydney', 1), (2, 'The Four Seasons Hotel Sydney', 'Sydney', 1), (3, 'The Shangri-La Hotel Sydney', 'Sydney', 0), (4, 'The InterContinental Sydney', 'Sydney', 1), (5, 'The Langham Sydney', 'Sydney', 0);", "sql": "SELECT city, 100.0 * SUM(ai_adoption) / COUNT(*) as adoption_percentage FROM hotels WHERE city = 'Sydney' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the total amount of Shariah-compliant loans issued by Zakat Bank in 2020, segmented by loan type?", "schema": "CREATE TABLE ZakatBank (id INT, loan_type VARCHAR(20), amount DECIMAL(10,2), issue_date DATE);", "sql": "SELECT loan_type, SUM(amount) FROM ZakatBank WHERE YEAR(issue_date) = 2020 GROUP BY loan_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Markatal, when Municipality is Leirvík, and when Inhabitants Per Km² is greater than 79?", "schema": "CREATE TABLE table_name_69 (markatal INTEGER, municipality VARCHAR, inhabitants_per_km² VARCHAR)", "sql": "SELECT MAX(markatal) FROM table_name_69 WHERE municipality = 'leirvík' AND inhabitants_per_km² > 79;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Determine the current landfill capacity for 'Oceania' from the 'landfill_capacity' table", "schema": "CREATE TABLE landfill_capacity (region VARCHAR(50), current_capacity INT);", "sql": "SELECT current_capacity FROM landfill_capacity WHERE region = 'Oceania';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total time for one south broad?", "schema": "CREATE TABLE table_name_79 (year INTEGER, name VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_79 WHERE name = 'one south broad';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Round that greg huntington went in?", "schema": "CREATE TABLE table_name_54 (round VARCHAR, name VARCHAR)", "sql": "SELECT round FROM table_name_54 WHERE name = 'greg huntington';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the number of buildings in the 'carbon_offsets' schema with energy consumption < 1000?", "schema": "CREATE TABLE carbon_offsets.building_energy_consumption (building VARCHAR(50), consumption FLOAT); INSERT INTO carbon_offsets.building_energy_consumption (building, consumption) VALUES ('Rainbow Building', 900.0), ('Skylight Building', 800.0), ('Sunrise Building', 700.0), ('Morning Building', 600.0), ('Evening Building', 500.0);", "sql": "SELECT COUNT(*) FROM carbon_offsets.building_energy_consumption WHERE consumption < 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Calculate the average sales price for each drug in 2021.", "schema": "CREATE TABLE drug_sales_price (drug_name TEXT, year INTEGER, sale_price NUMERIC(10, 2)); INSERT INTO drug_sales_price (drug_name, year, sale_price) VALUES ('DrugA', 2021, 120.50), ('DrugA', 2021, 125.00), ('DrugB', 2021, 150.75), ('DrugB', 2021, 155.00), ('DrugC', 2021, 100.00), ('DrugC', 2021, 105.00);", "sql": "SELECT drug_name, AVG(sale_price) as avg_sale_price FROM drug_sales_price WHERE year = 2021 GROUP BY drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which of the biggest points numbers had a year more recent than 1953?", "schema": "CREATE TABLE table_name_62 (points INTEGER, year INTEGER)", "sql": "SELECT MAX(points) FROM table_name_62 WHERE year > 1953;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 82).", "schema": null, "sql": "create index on idxpart1 using hash (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'stats_ext' (example 361).", "schema": null, "sql": "-- IN\nSELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ''1''');", "explanation": "PL/pgSQL object from PostgreSQL core test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which IATA's ICAO is rjss?", "schema": "CREATE TABLE table_name_17 (iata VARCHAR, icao VARCHAR)", "sql": "SELECT iata FROM table_name_17 WHERE icao = 'rjss';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On which date did Ger Loughnane from Team Clare have a match?", "schema": "CREATE TABLE table_name_88 (date VARCHAR, team VARCHAR, player VARCHAR)", "sql": "SELECT date FROM table_name_88 WHERE team = 'clare' AND player = 'ger loughnane';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'identity' (example 176).", "schema": null, "sql": "INSERT INTO pitest2_p2 (f1, f2, f3) VALUES ('2016-08-6', 'from pitest2_p2', 300);", "explanation": "DML from PostgreSQL core regression test for Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Insert a new community development initiative in 'Amazonas' region with ID 3, name 'Cultural Center', and status 'planning' into the 'community_development' table.", "schema": "CREATE TABLE community_development(id INT, region TEXT, initiative_name TEXT, status TEXT);", "sql": "INSERT INTO community_development (id, region, initiative_name, status) VALUES (3, 'Amazonas', 'Cultural Center', 'planning');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Find the number of unique chemicals produced by each factory.", "schema": "CREATE TABLE factories (id INT, name TEXT); INSERT INTO factories (id, name) VALUES (1, 'Factory A'), (2, 'Factory B'); CREATE TABLE chemical_produced (factory_id INT, chemical_name TEXT); INSERT INTO chemical_produced (factory_id, chemical_name) VALUES (1, 'Chemical X'), (1, 'Chemical X'), (2, 'Chemical Y'), (2, 'Chemical Z');", "sql": "SELECT factory_id, COUNT(DISTINCT chemical_name) AS unique_chemicals_produced FROM chemical_produced GROUP BY factory_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the average daily engagement for virtual tours in the 'Americas' region for the year 2022?", "schema": "CREATE TABLE virtual_tour_engagement (id INT, hotel_id INT, region TEXT, calendar DATE, engagement INT);", "sql": "SELECT region, AVG(engagement) FROM virtual_tour_engagement WHERE region = 'Americas' AND YEAR(calendar) = 2022 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Brown Motors best point total using the Offenhauser L4 engine since 1950?", "schema": "CREATE TABLE table_name_72 (points INTEGER, entrant VARCHAR, year VARCHAR, engine VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_72 WHERE year > 1950 AND engine = 'offenhauser l4' AND entrant = 'brown motors';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Pick has a Nationality of canada, and a Round of 5?", "schema": "CREATE TABLE table_name_98 (pick VARCHAR, nationality VARCHAR, round VARCHAR)", "sql": "SELECT pick FROM table_name_98 WHERE nationality = 'canada' AND round = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many community policing events were held in the Los Angeles County in the year 2020?", "schema": "CREATE TABLE community_policing (id INT, county VARCHAR(20), year INT, events INT);", "sql": "SELECT COUNT(*) FROM community_policing WHERE county = 'Los Angeles' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show names for all aircrafts with distances more than the average.", "schema": "CREATE TABLE Aircraft (name VARCHAR, distance INTEGER)", "sql": "SELECT name FROM Aircraft WHERE distance > (SELECT AVG(distance) FROM Aircraft);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "How many unique donors have there been for each program?", "schema": "CREATE TABLE donations (id INT, donor_id INT, donation_amount DECIMAL, donation_date DATE, donation_program VARCHAR); INSERT INTO donations (id, donor_id, donation_amount, donation_date, donation_program) VALUES (1, 101, '500', '2021-01-01', 'Education'), (2, 102, '300', '2021-02-01', 'Environment'), (3, 101, '800', '2021-03-01', 'Education'); CREATE TABLE programs (id INT, name VARCHAR); INSERT INTO programs (id, name) VALUES (1, 'Education'), (2, 'Environment');", "sql": "SELECT p.name as program_name, COUNT(DISTINCT d.donor_id) as num_unique_donors FROM donations d JOIN programs p ON d.donation_program = p.name GROUP BY p.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "How many athletes have participated in a specific event?", "schema": "CREATE TABLE events (event_id INT, event_name VARCHAR(50)); CREATE TABLE athlete_events (athlete_id INT, event_id INT);", "sql": "SELECT e.event_name, COUNT(*) as athlete_count FROM events e JOIN athlete_events ae ON e.event_id = ae.event_id GROUP BY e.event_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the week 13 game?", "schema": "CREATE TABLE table_name_96 (date VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_96 WHERE week = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest week the team played the chicago cardinals in front of less than 25,312?", "schema": "CREATE TABLE table_name_12 (week INTEGER, opponent VARCHAR, attendance VARCHAR)", "sql": "SELECT MIN(week) FROM table_name_12 WHERE opponent = 'chicago cardinals' AND attendance < 25 OFFSET 312;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indirect_toast' (example 3).", "schema": null, "sql": "CREATE TABLE indtoasttest(descr text, cnt int DEFAULT 0, f1 text, f2 text);", "explanation": "DDL from PostgreSQL core regression test for Indirect Toast.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total revenue of restaurants located in California?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, state TEXT); INSERT INTO restaurants (id, name, state) VALUES (1, 'Restaurant A', 'California'), (2, 'Restaurant B', 'New York'); CREATE TABLE revenue (restaurant_id INT, amount INT); INSERT INTO revenue (restaurant_id, amount) VALUES (1, 10000), (1, 12000), (2, 8000);", "sql": "SELECT SUM(revenue.amount) FROM revenue JOIN restaurants ON revenue.restaurant_id = restaurants.id WHERE restaurants.state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "List the titles and authors of all publications in the field of 'Machine Learning' that were published after the year 2010.", "schema": "CREATE TABLE publications (id INT, title TEXT, author VARCHAR(50), field VARCHAR(50), year INT); INSERT INTO publications (id, title, author, field, year) VALUES (1, 'Data Science for Dummies', 'Alice', 'Data Science', 2015); INSERT INTO publications (id, title, author, field, year) VALUES (2, 'Calculus: A Complete Course', 'John Doe', 'Calculus', 2005); INSERT INTO publications (id, title, author, field, year) VALUES (3, 'Machine Learning: A Comprehensive Introduction', 'Eve Researcher', 'Machine Learning', 2012);", "sql": "SELECT title, author FROM publications WHERE field = 'Machine Learning' AND year > 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the episode(s) aired in the U.S. on 4 april 2008, what were the names?", "schema": "CREATE TABLE table_10935205_1 (title VARCHAR, us_airdate VARCHAR)", "sql": "SELECT title FROM table_10935205_1 WHERE us_airdate = '4 April 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the win with points for being 376", "schema": "CREATE TABLE table_12828723_3 (won VARCHAR, points_for VARCHAR)", "sql": "SELECT won FROM table_12828723_3 WHERE points_for = '376';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the Asian countries which have a population larger than that of any country in Africa?", "schema": "CREATE TABLE country (Name VARCHAR, Continent VARCHAR, population INTEGER)", "sql": "SELECT Name FROM country WHERE Continent = 'Asia' AND population > (SELECT MIN(population) FROM country WHERE Continent = 'Africa');", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "What is the number of workers employed in factories that use renewable energy sources in Germany and France?", "schema": "CREATE TABLE factories (factory_id INT, country VARCHAR(50), energy_source VARCHAR(50)); CREATE TABLE workers (worker_id INT, factory_id INT, position VARCHAR(50)); INSERT INTO factories (factory_id, country, energy_source) VALUES (1, 'Germany', 'renewable'), (2, 'France', 'fossil fuel'), (3, 'Germany', 'renewable'); INSERT INTO workers (worker_id, factory_id, position) VALUES (1, 1, 'manager'), (2, 1, 'engineer'), (3, 2, 'worker'), (4, 3, 'manager');", "sql": "SELECT COUNT(workers.worker_id) FROM workers INNER JOIN factories ON workers.factory_id = factories.factory_id WHERE factories.country IN ('Germany', 'France') AND factories.energy_source = 'renewable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 127).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_domain( 'public'::name, 'us_postal_code'::name ),\n false,\n 'hasnt_domain(scheam, domain)',\n 'Domain public.us_postal_code should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the population of сурдук in 2011?", "schema": "CREATE TABLE table_2562572_53 (population__2011_ VARCHAR, cyrillic_name VARCHAR)", "sql": "SELECT COUNT(population__2011_) FROM table_2562572_53 WHERE cyrillic_name = 'Сурдук';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest year that Târlea took 7th place?", "schema": "CREATE TABLE table_name_6 (year INTEGER, position VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_6 WHERE position = '7th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Cmpok (assertion 6).", "schema": null, "sql": "SELECT * FROM check_test(\n cmp_ok( ARRAY['192.168.1.2'::inet], '=', ARRAY['192.168.1.2'::inet] ),\n true,\n 'cmp_ok( inet[], =, inet[] )',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Cmpok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 466).", "schema": null, "sql": "DELETE FROM temporal_rng WHERE id = '[5,6)' AND valid_at = daterange('2018-01-01', '2018-02-01');", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different sizes (in acres) are noted for Rathcoola East?", "schema": "CREATE TABLE table_30120566_1 (area__acres__ VARCHAR, townland VARCHAR)", "sql": "SELECT COUNT(area__acres__) FROM table_30120566_1 WHERE townland = 'Rathcoola East';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Ravens' record on December 5, 1999?", "schema": "CREATE TABLE table_name_33 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_name_33 WHERE date = 'december 5, 1999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score has 33 as the points?", "schema": "CREATE TABLE table_name_93 (score VARCHAR, points VARCHAR)", "sql": "SELECT score FROM table_name_93 WHERE points = 33;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Show an example of PostgreSQL UPDATE (example 7).", "schema": null, "sql": "UPDATE accounts SET (contact_first_name, contact_last_name) = (SELECT first_name, last_name FROM employees WHERE employees.id = accounts.sales_person);", "explanation": "PostgreSQL UPDATE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 13).", "schema": null, "sql": "SELECT count(*) FROM timetmp WHERE a = '10:57:11'::time;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Find the mining operations in the 'Andes' mountain range with the highest CO2 emissions", "schema": "CREATE TABLE MiningOperations (id INT, name VARCHAR(255), type VARCHAR(255), co2_emissions INT, location VARCHAR(255)); INSERT INTO MiningOperations (id, name, type, co2_emissions, location) VALUES (1, 'Open Pit Mining', 'Surface', 500, 'Andes'), (2, 'Underground Mining', 'Underground', 300, 'Andes'), (3, 'Mountaintop Removal Mining', 'Surface', 700, 'Appalachians'), (4, 'Placer Mining', 'Surface', 200, 'Himalayas'), (5, 'Hard Rock Mining', 'Underground', 800, 'Urals');", "sql": "SELECT name, type, co2_emissions FROM MiningOperations WHERE location = 'Andes' ORDER BY co2_emissions DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Decision when the Visitor was Carolina?", "schema": "CREATE TABLE table_name_53 (decision VARCHAR, visitor VARCHAR)", "sql": "SELECT decision FROM table_name_53 WHERE visitor = 'carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 513).", "schema": null, "sql": "SELECT bit_count('\\x1234567890'::bytea);", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT bit_count('\\x1234567890'::bytea)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Delete the record of the investment round with id 5 in the investment_rounds table", "schema": "CREATE TABLE investment_rounds (id INT, company_id INT, funding_amount INT);", "sql": "DELETE FROM investment_rounds WHERE id = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total amount spent on raw materials for each product line?", "schema": "CREATE TABLE raw_materials (id INT, product_line VARCHAR(50), amount INT); INSERT INTO raw_materials (id, product_line, amount) VALUES (1, 'product1', 10000); INSERT INTO raw_materials (id, product_line, amount) VALUES (2, 'product2', 15000);", "sql": "SELECT product_line, SUM(amount) FROM raw_materials GROUP BY product_line;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type is the misurina to bassano del grappa course?", "schema": "CREATE TABLE table_name_24 (type VARCHAR, course VARCHAR)", "sql": "SELECT type FROM table_name_24 WHERE course = 'misurina to bassano del grappa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Identify the mental health facilities that have the highest and lowest capacity for each type.", "schema": "CREATE TABLE mental_health_facilities (facility_id INT, name VARCHAR(255), location VARCHAR(255), type VARCHAR(255), capacity INT); INSERT INTO mental_health_facilities (facility_id, name, location, type, capacity) VALUES (1, 'Serenity House', 'New York, NY', 'Inpatient', 50), (2, 'Harmony House', 'New York, NY', 'Inpatient', 80), (3, 'Tranquility House', 'New York, NY', 'Outpatient', 30), (4, 'Zenith House', 'New York, NY', 'Outpatient', 60);", "sql": "SELECT facility_id, name, type, capacity, MIN(capacity) OVER(PARTITION BY type) as lowest_capacity, MAX(capacity) OVER(PARTITION BY type) as highest_capacity FROM mental_health_facilities;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 188, "num_statements": 1} {"question": "Identify the vessels that have transported more than 120 containers to India in the month of May 2020.", "schema": "CREATE TABLE Vessel_Movements (id INT, vessel VARCHAR(255), quantity INT, port VARCHAR(255), time DATETIME); INSERT INTO Vessel_Movements (id, vessel, quantity, port, time) VALUES (1, 'Indian Ocean Giant', 150, 'Mumbai', '2020-05-01 10:00:00'), (2, 'Maritime Marvel', 120, 'Chennai', '2020-05-15 15:30:00');", "sql": "SELECT vessel FROM Vessel_Movements VM JOIN (SELECT vessel, SUM(quantity) AS total_quantity FROM Vessel_Movements WHERE port = 'India' AND MONTH(time) = 5 AND YEAR(time) = 2020 GROUP BY vessel) V ON VM.vessel = V.vessel WHERE V.total_quantity > 120;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 249, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What state is Tanya Plibersek from?", "schema": "CREATE TABLE table_name_15 (state VARCHAR, member VARCHAR)", "sql": "SELECT state FROM table_name_15 WHERE member = 'tanya plibersek';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List all product safety records with a rating below 70 for Indian-manufactured cosmetics.", "schema": "CREATE TABLE product_safety (product_name TEXT, manufactured_country TEXT, safety_rating INT); INSERT INTO product_safety VALUES ('Cleanser 1', 'India', 65), ('Toner 1', 'India', 72), ('Moisturizer 1', 'India', 80), ('Cleanser 2', 'USA', 78), ('Toner 2', 'France', 85);", "sql": "SELECT product_name, manufactured_country, safety_rating FROM product_safety WHERE manufactured_country = 'India' AND safety_rating < 70;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the total number of smart city devices in cities with a population greater than 500,000?", "schema": "CREATE TABLE City (id INT, name VARCHAR(255), population INT, smart_city_devices INT); INSERT INTO City (id, name, population, smart_city_devices) VALUES (1, 'Tokyo', 9000000, 500); INSERT INTO City (id, name, population, smart_city_devices) VALUES (2, 'Berlin', 3500000, 300);", "sql": "SELECT COUNT(smart_city_devices) FROM City WHERE population > 500000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Which climate adaptation projects have the same budget as any project in the climate finance domain?", "schema": "CREATE TABLE climate_adaptation(project_name TEXT, budget FLOAT); INSERT INTO climate_adaptation(project_name, budget) VALUES ('Project I', 200000.00), ('Project J', 250000.00), ('Project K', 300000.00), ('Project L', 350000.00);", "sql": "SELECT project_name FROM climate_adaptation WHERE budget IN (SELECT budget FROM climate_finance);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Insert a new sale for the state of Washington in Q3 2022 with a revenue of 20000 and a strain of \"Purple Haze\"", "schema": "CREATE TABLE sales (id INT, state VARCHAR(50), quarter VARCHAR(10), strain VARCHAR(50), revenue INT);", "sql": "INSERT INTO sales (state, quarter, strain, revenue) VALUES ('Washington', 'Q3', 'Purple Haze', 20000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "How many healthcare projects in the 'healthcare_projects' table have a budget over 2 million dollars?", "schema": "CREATE TABLE healthcare_projects (project VARCHAR(50), budget INT); INSERT INTO healthcare_projects (project, budget) VALUES ('Hospital Building', 4000000); INSERT INTO healthcare_projects (project, budget) VALUES ('Medical Equipment', 1000000); INSERT INTO healthcare_projects (project, budget) VALUES ('Clinic Expansion', 2500000);", "sql": "SELECT COUNT(*) FROM healthcare_projects WHERE budget > 2000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 102).", "schema": null, "sql": "CREATE FUNCTION composite_trigger_noop_f() RETURNS trigger AS $$\n return 'MODIFY'\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Delete records of fashion trends that were discontinued in the last year, based on the UpdateDate in the FashionTrends table.", "schema": "CREATE TABLE FashionTrends (TrendID INT, TrendName VARCHAR(50), Category VARCHAR(50), UpdateDate DATE);", "sql": "DELETE FROM FashionTrends WHERE UpdateDate < DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "For the \"defense_diplomacy\" table, insert new records for each of the following: Malaysia, 2021, Bilateral; Thailand, 2022, Multilateral", "schema": "CREATE TABLE defense_diplomacy (id INT PRIMARY KEY, country VARCHAR(50), year INT, type VARCHAR(20));", "sql": "INSERT INTO defense_diplomacy (id, country, year, type) VALUES (1, 'Malaysia', 2021, 'Bilateral'), (2, 'Thailand', 2022, 'Multilateral');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the total revenue generated by sustainable tourism in Germany and France last year?", "schema": "CREATE TABLE tourism_revenue (year INT, country VARCHAR(255), revenue INT); INSERT INTO tourism_revenue (year, country, revenue) VALUES (2021, 'Germany', 9000000), (2021, 'France', 10000000);", "sql": "SELECT SUM(revenue) FROM tourism_revenue WHERE country IN ('Germany', 'France') AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What was the average delay for defense projects in the Asia-Pacific region in 2020?", "schema": "CREATE SCHEMA if not exists defense_projects;CREATE TABLE if not exists defense_project_delays(project_name text, delay_region text, delay_year integer, delay_duration integer);INSERT INTO defense_project_delays(project_name, delay_region, delay_year, delay_duration) VALUES('F-35', 'Asia-Pacific', 2020, 2), ('Joint Light Tactical Vehicle', 'Asia-Pacific', 2020, 3), ('Global Hawk', 'Asia-Pacific', 2020, 1);", "sql": "SELECT AVG(delay_duration) FROM defense_project_delays WHERE delay_region = 'Asia-Pacific' AND delay_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Venue on October 27, 2005 had a Score of 5-0?", "schema": "CREATE TABLE table_name_65 (venue VARCHAR, score VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_65 WHERE score = '5-0' AND date = 'october 27, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When BVA was 3%, what was the Ipsos?", "schema": "CREATE TABLE table_name_16 (ipsos_5_30_09 VARCHAR, bva_6_1_09 VARCHAR)", "sql": "SELECT ipsos_5_30_09 FROM table_name_16 WHERE bva_6_1_09 = '3%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'stats_ext' (example 638).", "schema": null, "sql": "INSERT INTO mcv_lists_multi (a, b, c, d)\n SELECT\n mod(i,5),\n mod(i,5),\n mod(i,7),\n mod(i,7)\n FROM generate_series(1,5000) s(i);", "explanation": "DML from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangetypes' (example 356).", "schema": null, "sql": "-- infer anyelement from anyrange\ncreate function inoutparam_succeed(out i anyelement, inout r anyrange)\n as $$ select upper($1), $1 $$ language sql;\n\nselect * from inoutparam_succeed(int4range(1,2));\n\ncreate function table_succeed(r anyrange)\n returns table(l anyelement, u anyelement)\n as $$ select lower($1), upper($1) $$\n language sql;", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 343, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: Who is every young rider classification when Jelle Vanendert is the winner?", "schema": "CREATE TABLE table_25999087_2 (young_rider_classification VARCHAR, winner VARCHAR)", "sql": "SELECT young_rider_classification FROM table_25999087_2 WHERE winner = 'Jelle Vanendert';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of UCI points for Kim Kirchen?", "schema": "CREATE TABLE table_name_42 (uci_points INTEGER, cyclist VARCHAR)", "sql": "SELECT SUM(uci_points) FROM table_name_42 WHERE cyclist = 'kim kirchen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List all marine species that are threatened or endangered and have been recorded at a depth greater than 2000 meters.", "schema": "CREATE TABLE marine_species (species_id INT, species_name VARCHAR(100), conservation_status VARCHAR(50), max_depth FLOAT, order_name VARCHAR(50));", "sql": "SELECT species_name FROM marine_species WHERE conservation_status IN ('Threatened', 'Endangered') AND max_depth > 2000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "How many solar power plants were built in '2020' and '2021' that have an efficiency rating above 20%?", "schema": "CREATE TABLE solar_plants (id INT, name VARCHAR(50), built_year INT, location VARCHAR(50), efficiency FLOAT); INSERT INTO solar_plants (id, name, built_year, location, efficiency) VALUES (1, 'SolarPlant1', 2020, 'LocationA', 0.22), (2, 'SolarPlant2', 2021, 'LocationB', 0.18);", "sql": "SELECT COUNT(*) FROM solar_plants WHERE built_year IN (2020, 2021) AND efficiency > 0.20;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent in the final when frankfurt is championship and the year is less than 1993.0?", "schema": "CREATE TABLE table_22834834_2 (opponent_in_the_final VARCHAR, championship VARCHAR, year VARCHAR)", "sql": "SELECT opponent_in_the_final FROM table_22834834_2 WHERE championship = 'Frankfurt' AND year < 1993.0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Ashley Grimes had what to club?", "schema": "CREATE TABLE table_name_26 (to_club VARCHAR, player VARCHAR)", "sql": "SELECT to_club FROM table_name_26 WHERE player = 'ashley grimes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Round of 16 value for the nation with a value of 5 for Ranking Round Rank?", "schema": "CREATE TABLE table_name_83 (round_of_16 VARCHAR, ranking_round_rank VARCHAR)", "sql": "SELECT round_of_16 FROM table_name_83 WHERE ranking_round_rank = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average grid for jack brabham going over 32 laps?", "schema": "CREATE TABLE table_name_66 (grid INTEGER, driver VARCHAR, laps VARCHAR)", "sql": "SELECT AVG(grid) FROM table_name_66 WHERE driver = 'jack brabham' AND laps > 32;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total number of employees working in mining sites located in California?", "schema": "CREATE TABLE sites (site_id INT, site_name VARCHAR(100), state VARCHAR(50)); INSERT INTO sites (site_id, site_name, state) VALUES (1, 'Golden Mining Site', 'California'); INSERT INTO sites (site_id, site_name, state) VALUES (2, 'Silver Peak Mine', 'Nevada'); CREATE TABLE employees (employee_id INT, employee_name VARCHAR(100), site_id INT); INSERT INTO employees (employee_id, employee_name, site_id) VALUES (1, 'John Doe', 1); INSERT INTO employees (employee_id, employee_name, site_id) VALUES (2, 'Jane Smith', 1); INSERT INTO employees (employee_id, employee_name, site_id) VALUES (3, 'Robert Johnson', 2);", "sql": "SELECT COUNT(*) FROM employees WHERE site_id IN (SELECT site_id FROM sites WHERE state = 'California');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the total production cost of bamboo viscose in China?", "schema": "CREATE TABLE ProductionCosts (product VARCHAR(255), material VARCHAR(255), cost DECIMAL(10,2)); INSERT INTO ProductionCosts (product, material, cost) VALUES ('Bamboo Viscose', 'China', 8.5);", "sql": "SELECT SUM(cost) FROM ProductionCosts WHERE product = 'Bamboo Viscose' AND material = 'China';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average warehouse management cost per item for each warehouse in Q3 2021?", "schema": "CREATE TABLE warehouse_management (item_id INT, warehouse_id INT, cost FLOAT, order_date DATE);", "sql": "SELECT warehouse_id, AVG(cost/COUNT(*)) as avg_cost_per_item FROM warehouse_management WHERE EXTRACT(MONTH FROM order_date) BETWEEN 7 AND 9 GROUP BY warehouse_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the amount of viewers if the series number is 14?", "schema": "CREATE TABLE table_24639086_3 (viewers__in_millions_ VARCHAR, series__number VARCHAR)", "sql": "SELECT viewers__in_millions_ FROM table_24639086_3 WHERE series__number = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "How many volunteers engaged in each program in Q1 2021, grouped by city?", "schema": "CREATE TABLE Volunteers (VolunteerID INT, Name TEXT, City TEXT); CREATE TABLE Programs (ProgramID INT, Name TEXT, VolunteerID INT, StartDate DATE); INSERT INTO Volunteers (VolunteerID, Name, City) VALUES (1, 'James Johnson', 'New York'), (2, 'Natalie Brown', 'Los Angeles'), (3, 'Michael Davis', 'Chicago'); INSERT INTO Programs (ProgramID, Name, VolunteerID, StartDate) VALUES (1, 'Tutoring Kids', 1, '2021-01-05'), (2, 'Cleaning Beaches', 2, '2021-03-20'), (3, 'Planting Trees', 3, '2021-02-01');", "sql": "SELECT City, COUNT(DISTINCT Volunteers.VolunteerID) as 'Number of Volunteers' FROM Programs INNER JOIN Volunteers ON Programs.VolunteerID = Volunteers.VolunteerID WHERE Programs.StartDate BETWEEN '2021-01-01' AND '2021-03-31' GROUP BY City;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "What is the percentage of users in the entertainment industry, in the United States, who have opted out of targeted advertising?", "schema": "CREATE TABLE users (id INT, country VARCHAR(255), industry VARCHAR(255), opted_out BOOLEAN);", "sql": "SELECT 100.0 * SUM(CASE WHEN opted_out THEN 1 ELSE 0 END) / COUNT(*) FROM users WHERE country = 'United States' AND industry = 'entertainment';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the name, age, and country of donors who have donated more than $5000?", "schema": "CREATE TABLE Donors (DonorID int, Name varchar(50), Age int, Country varchar(50), Donations int); INSERT INTO Donors (DonorID, Name, Age, Country, Donations) VALUES (1, 'John Doe', 30, 'USA', 5000), (2, 'Jane Smith', 45, 'Canada', 7000), (3, 'Pedro Martinez', 25, 'Mexico', 6000);", "sql": "SELECT d.Name, d.Age, d.Country FROM Donors d WHERE d.Donations > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "List the countries and their military branches that use drone technology.", "schema": "CREATE TABLE DroneMilitary (Country VARCHAR(255), Branch VARCHAR(255)); INSERT INTO DroneMilitary (Country, Branch) VALUES ('USA', 'Air Force'), ('USA', 'Navy'), ('Israel', 'Air Force'), ('Turkey', 'Air Force');", "sql": "SELECT Country FROM DroneMilitary WHERE Branch IN ('Air Force', 'Navy');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average number of views for news stories published in the \"news\" table for each month in 2020?", "schema": "CREATE TABLE news (id INT, title VARCHAR(100), views INT, date DATE);", "sql": "SELECT MONTH(date) AS month, AVG(views) AS avg_views FROM news WHERE YEAR(date) = 2020 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "How many social equity dispensary licenses were issued in Oakland before 2020?", "schema": "CREATE TABLE licenses (id INT, type TEXT, applicant TEXT, city TEXT, issue_date DATE); INSERT INTO licenses (id, type, applicant, city, issue_date) VALUES (1, 'dispensary', 'social equity', 'Oakland', '2019-01-01'), (2, 'manufacturing', 'general', 'Oakland', '2018-01-01');", "sql": "SELECT COUNT(*) FROM licenses WHERE type = 'dispensary' AND applicant = 'social equity' AND city = 'Oakland' AND issue_date < '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 119).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_descendent_of( 'hide', 'h_child2', 'hide', 'h_parent', 2, 'Lookie' ),\n false,\n 'isnt_descendent_of(csch, ctab, psch, ptab, 2, desc)',\n 'Lookie',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "What is the total waste generation (in kg) for each city and material type?", "schema": "CREATE TABLE waste_generation_material (city VARCHAR(255), material VARCHAR(255), year INT, amount FLOAT); INSERT INTO waste_generation_material (city, material, year, amount) VALUES ('CityA', 'Plastic', 2019, 300.0), ('CityA', 'Paper', 2019, 400.0), ('CityA', 'Glass', 2019, 500.0), ('CityB', 'Plastic', 2019, 200.0), ('CityB', 'Paper', 2019, 350.0), ('CityB', 'Glass', 2019, 450.0);", "sql": "SELECT city, material, SUM(amount) FROM waste_generation_material GROUP BY city, material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type where the name is bojan?", "schema": "CREATE TABLE table_11891841_2 (type VARCHAR, name VARCHAR)", "sql": "SELECT type FROM table_11891841_2 WHERE name = 'Bojan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Play-by-play when the color commentator was Eric Wynalda, earlier than 2005?", "schema": "CREATE TABLE table_name_11 (play_by_play VARCHAR, color_commentator_s_ VARCHAR, year VARCHAR)", "sql": "SELECT play_by_play FROM table_name_11 WHERE color_commentator_s_ = 'eric wynalda' AND year < 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "How many players are there in the \"EpicRacers\" table, grouped by their preferred gaming platform (PC, Console, Mobile)?", "schema": "CREATE TABLE EpicRacers (PlayerID INT, Name VARCHAR(50), Platform VARCHAR(10)); INSERT INTO EpicRacers (PlayerID, Name, Platform) VALUES (1, 'John', 'PC'), (2, 'Amy', 'Console'), (3, 'Mike', 'Mobile'), (4, 'Linda', 'PC'), (5, 'Sam', 'Console');", "sql": "SELECT Platform, COUNT(PlayerID) FROM EpicRacers GROUP BY Platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the womens singles of marcus ellis gabrielle white?", "schema": "CREATE TABLE table_12104319_1 (womens_singles VARCHAR, mixed_doubles VARCHAR)", "sql": "SELECT womens_singles FROM table_12104319_1 WHERE mixed_doubles = 'Marcus Ellis Gabrielle White';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Which industries have the most diverse founding teams in terms of gender?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50), industry VARCHAR(50), founder_gender VARCHAR(10)); INSERT INTO company (id, name, industry, founder_gender) VALUES (1, 'Acme Inc', 'Tech', 'Female'), (2, 'Beta Corp', 'Finance', 'Male'), (3, 'Gamma Startup', 'Tech', 'Female'), (4, 'Delta Company', 'Finance', 'Non-binary');", "sql": "SELECT industry, COUNT(DISTINCT founder_gender) AS diversity_score FROM company GROUP BY industry ORDER BY diversity_score DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Get the number of renewable energy projects in the RenewableEnergy schema", "schema": "CREATE SCHEMA RenewableEnergy; USE RenewableEnergy; CREATE TABLE RenewableEnergyProjects (id INT, project_name VARCHAR(100), type VARCHAR(50)); INSERT INTO RenewableEnergyProjects (id, project_name, type) VALUES (1, 'Hydroelectric Plant', 'Hydro'), (2, 'Wind Farm', 'Wind'), (3, 'Solar Farm', 'Solar');", "sql": "SELECT COUNT(*) FROM RenewableEnergy.RenewableEnergyProjects WHERE type IN ('Hydro', 'Wind', 'Solar');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the total number of creative AI applications developed in Africa?", "schema": "CREATE TABLE AIApplications (ApplicationId INT, Name TEXT, Type TEXT, CreativityScore FLOAT, Country TEXT); INSERT INTO AIApplications (ApplicationId, Name, Type, CreativityScore, Country) VALUES (1, 'ApplicationX', 'Art Generation', 0.85, 'Nigeria'), (2, 'ApplicationY', 'Music Generation', 0.95, 'South Africa'), (3, 'ApplicationZ', 'Writing Generation', 0.75, 'Egypt');", "sql": "SELECT SUM(CreativityScore) FROM AIApplications WHERE Type = 'Creative';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'temp': Write the SELECT query (example 163).", "schema": null, "sql": "SELECT pg_relation_size('test_temp') / current_setting('block_size')::int8 > 200;", "explanation": "Regression test for Temp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_relation_size('test_temp') / current_setting('block_size')::int8 > 200) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List all the distinct rare earth elements in the market trends data.", "schema": "CREATE TABLE market_trends (element VARCHAR(255), price DECIMAL(10,2), quantity INT); INSERT INTO market_trends (element, price, quantity) VALUES ('Neodymium', 92.50, 5000), ('Praseodymium', 85.20, 3000), ('Dysprosium', 120.00, 2000);", "sql": "SELECT DISTINCT element FROM market_trends;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the percentage of security incidents caused by insiders in the energy sector?", "schema": "CREATE TABLE energy_sector (sector VARCHAR(255), incident_type VARCHAR(255), incidents INT); INSERT INTO energy_sector (sector, incident_type, incidents) VALUES ('Energy', 'Insider Threat', 50), ('Energy', 'Phishing', 100), ('Energy', 'Malware', 75), ('Energy', 'Ransomware', 100), ('Energy', 'DDoS', 100);", "sql": "SELECT (SUM(CASE WHEN incident_type = 'Insider Threat' THEN incidents ELSE 0 END) / SUM(incidents)) * 100 FROM energy_sector WHERE sector = 'Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Which destinations issued travel advisories in the first half of 2020?", "schema": "CREATE TABLE advisories (destination VARCHAR(50), advisory_issue_date DATE); INSERT INTO advisories (destination, advisory_issue_date) VALUES ('Brazil', '2020-01-20'), ('Argentina', '2020-03-15'), ('Colombia', '2020-06-01');", "sql": "SELECT destination FROM advisories WHERE MONTH(advisory_issue_date) <= 6 AND YEAR(advisory_issue_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Silver with a Bronze that is larger than 0 with a Gold smaller than 0?", "schema": "CREATE TABLE table_name_26 (silver INTEGER, bronze VARCHAR, gold VARCHAR)", "sql": "SELECT SUM(silver) FROM table_name_26 WHERE bronze > 0 AND gold < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was tyre that was made by cooper - bristol that was driven by bob gerard?", "schema": "CREATE TABLE table_name_77 (tyre VARCHAR, constructor VARCHAR, driver VARCHAR)", "sql": "SELECT tyre FROM table_name_77 WHERE constructor = 'cooper - bristol' AND driver = 'bob gerard';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what League is the Reed School?", "schema": "CREATE TABLE table_name_87 (league VARCHAR, school VARCHAR)", "sql": "SELECT league FROM table_name_87 WHERE school = 'reed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 296).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (7,1,'-83028485');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the average CO2 emissions per garment for the Fall 2021 collection?", "schema": "CREATE TABLE emissions (collection VARCHAR(10), co2_emissions FLOAT, units INT); INSERT INTO emissions (collection, co2_emissions, units) VALUES ('Fall_2021', 12.5, 1000), ('Fall_2021', 11.7, 1500), ('Fall_2021', 13.2, 1200);", "sql": "SELECT AVG(co2_emissions) FROM emissions WHERE collection = 'Fall_2021';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of marine species observed in the Arctic Ocean, and how many of these species are endemic to the Arctic?", "schema": "CREATE TABLE marine_species (species_name TEXT, ocean TEXT, endemic BOOLEAN); INSERT INTO marine_species (species_name, ocean, endemic) VALUES ('Species C', 'Arctic Ocean', TRUE); INSERT INTO marine_species (species_name, ocean, endemic) VALUES ('Species D', 'Arctic Ocean', FALSE);", "sql": "SELECT COUNT(*) AS total_species, SUM(endemic) AS endemic_species FROM marine_species WHERE ocean = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who holds third place in the tournament with a score of 2–6, 7–6(3), [10–5]?", "schema": "CREATE TABLE table_name_16 (third_place VARCHAR, score VARCHAR)", "sql": "SELECT third_place FROM table_name_16 WHERE score = '2–6, 7–6(3), [10–5]';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the venue for the asian games tournament?", "schema": "CREATE TABLE table_name_55 (venue VARCHAR, tournament VARCHAR)", "sql": "SELECT venue FROM table_name_55 WHERE tournament = 'asian games';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 221).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_aggregate( 'public', 'nope', ARRAY[etype()], 'whatever' ),\n false,\n 'isnt_aggregate(schema, noagg, arg, desc)',\n 'whatever',\n ' Function public.nope(' || etype() || ') does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 236, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'prepared_xacts' (example 53).", "schema": null, "sql": "INSERT INTO pxtest2 VALUES (3);", "explanation": "DML from PostgreSQL core regression test for Prepared Xacts.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was Home on December 5, 2007?", "schema": "CREATE TABLE table_name_11 (home VARCHAR, date VARCHAR)", "sql": "SELECT home FROM table_name_11 WHERE date = 'december 5, 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the number of flu shots administered in rural California this year?", "schema": "CREATE TABLE flu_shots (shot_id INT, patient_name TEXT, shot_date DATE, location TEXT); INSERT INTO flu_shots (shot_id, patient_name, shot_date, location) VALUES (1, 'Jane Doe', '2022-02-15', 'California');", "sql": "SELECT COUNT(*) FROM flu_shots WHERE EXTRACT(YEAR FROM shot_date) = EXTRACT(YEAR FROM CURRENT_DATE) AND location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the percentage of tickets sold for each event type, for events in Los Angeles?", "schema": "CREATE TABLE Events (id INT, event_name VARCHAR(100), event_type VARCHAR(50), location VARCHAR(100), start_time TIMESTAMP); CREATE TABLE Tickets (id INT, ticket_number INT, event_id INT, purchaser_name VARCHAR(100), purchase_date DATE);", "sql": "SELECT event_type, 100.0 * COUNT(ticket_number) / SUM(COUNT(ticket_number)) OVER (PARTITION BY NULL) as percentage FROM Events JOIN Tickets ON Events.id = Tickets.event_id WHERE location LIKE '%Los Angeles%' GROUP BY event_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 228, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 52).", "schema": null, "sql": "SELECT to_regprocedure('-')::oid;", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_regprocedure('-')::oid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the average water requirement for crops grown in each province in India?", "schema": "CREATE TABLE irrigation (id INT, farm_id INT, irrigation_amount INT); INSERT INTO irrigation (id, farm_id, irrigation_amount) VALUES (1, 1, 1000), (2, 2, 1500);", "sql": "SELECT provinces.name, AVG(crops.water_requirement) FROM crops JOIN (SELECT farm_id FROM farms WHERE provinces.name = provinces.name) as subquery ON crops.id = subquery.farm_id JOIN irrigation ON crops.id = irrigation.farm_id JOIN provinces ON farms.id = provinces.id GROUP BY provinces.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the time of the driver with a finish position of 19?", "schema": "CREATE TABLE table_name_13 (time_retired VARCHAR, fin_pos VARCHAR)", "sql": "SELECT time_retired FROM table_name_13 WHERE fin_pos = '19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many volunteers signed up in Q1 2022, by their state?", "schema": "CREATE TABLE volunteers (id INT, name VARCHAR(255), signup_date DATE, state VARCHAR(255)); INSERT INTO volunteers (id, name, signup_date, state) VALUES (1, 'John Doe', '2022-01-02', 'New York'), (2, 'Jane Smith', '2022-04-15', 'California'), (3, 'Mike Johnson', '2022-03-28', 'Illinois');", "sql": "SELECT state, COUNT(*) FROM volunteers WHERE signup_date >= '2022-01-01' AND signup_date < '2022-04-01' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 17.", "schema": null, "sql": "CREATE TABLE products ( product_no integer, name text, price numeric, CHECK (price > 0), discounted_price numeric, CHECK (discounted_price > 0), CONSTRAINT valid_discount CHECK (price > discounted_price) );", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "What's the average number of artworks viewed per visitor in the 'Europe' region?", "schema": "CREATE TABLE Artworks (ArtworkID INT, ExhibitionID INT, VisitorID INT);", "sql": "SELECT AVG(a.ArtworksViewed) FROM (SELECT v.VisitorID, COUNT(a.ArtworkID) ArtworksViewed FROM Artworks a JOIN Exhibitions e ON a.ExhibitionID = e.ExhibitionID JOIN Visitors v ON a.VisitorID = v.VisitorID JOIN Regions r ON v.Country = r.CountryName WHERE r.Region = 'Europe' GROUP BY v.VisitorID) a;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 298, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district first elected a Democratic incumbent in 1998?", "schema": "CREATE TABLE table_1805191_50 (district VARCHAR, party VARCHAR, first_elected VARCHAR)", "sql": "SELECT district FROM table_1805191_50 WHERE party = 'Democratic' AND first_elected = 1998;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "List decentralized applications and their respective regulatory frameworks.", "schema": "CREATE TABLE DApps (DAppId INT, DAppName VARCHAR(50), RegulatorId INT); CREATE TABLE Regulators (RegulatorId INT, RegulatorName VARCHAR(50), Region VARCHAR(50)); INSERT INTO DApps (DAppId, DAppName, RegulatorId) VALUES (1, 'App1', 1); INSERT INTO DApps (DAppId, DAppName, RegulatorId) VALUES (2, 'App2', 2); INSERT INTO DApps (DAppId, DAppName, RegulatorId) VALUES (3, 'App3', 3); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (1, 'Regulator1', 'US'); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (2, 'Regulator2', 'EU'); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (3, 'Regulator3', 'UK');", "sql": "SELECT da.DAppName, r.RegulatorName FROM DApps da INNER JOIN Regulators r ON da.RegulatorId = r.RegulatorId;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "How many cases were opened for each attorney in the 'Southeast' district in 2021?", "schema": "CREATE TABLE attorney_cases(attorney_id INT, case_id INT, district VARCHAR(20), opened_date DATE); INSERT INTO attorney_cases(attorney_id, case_id, district, opened_date) VALUES (1, 101, 'Southeast', '2021-03-01'), (2, 102, 'Northeast', '2021-08-15'), (3, 103, 'Southeast', '2021-11-29'), (1, 104, 'Southeast', '2021-05-12'), (4, 105, 'Southeast', '2021-09-22');", "sql": "SELECT attorney_id, COUNT(*) FROM attorney_cases WHERE district = 'Southeast' AND YEAR(opened_date) = 2021 GROUP BY attorney_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the average claim amount for policies in the 'Home' coverage type?", "schema": "CREATE TABLE Policy (PolicyNumber INT, CoverageType VARCHAR(50)); CREATE TABLE Claim (ClaimNumber INT, PolicyNumber INT, ClaimAmount INT); INSERT INTO Policy (PolicyNumber, CoverageType) VALUES (1, 'Home'); INSERT INTO Policy (PolicyNumber, CoverageType) VALUES (2, 'Auto'); INSERT INTO Claim (ClaimNumber, PolicyNumber, ClaimAmount) VALUES (1, 1, 5000); INSERT INTO Claim (ClaimNumber, PolicyNumber, ClaimAmount) VALUES (2, 1, 7000); INSERT INTO Claim (ClaimNumber, PolicyNumber, ClaimAmount) VALUES (3, 2, 3000);", "sql": "SELECT AVG(ClaimAmount) FROM Claim JOIN Policy ON Claim.PolicyNumber = Policy.PolicyNumber WHERE Policy.CoverageType = 'Home';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Find the number of units of women's garments produced using recycled materials.", "schema": "CREATE TABLE garment_units_recycled_materials (id INT, garment_type VARCHAR(50), units INT, recycled_material BOOLEAN); INSERT INTO garment_units_recycled_materials (id, garment_type, units, recycled_material) VALUES (1, 'Dress', 300, true); INSERT INTO garment_units_recycled_materials (id, garment_type, units, recycled_material) VALUES (2, 'Skirt', 200, true); INSERT INTO garment_units_recycled_materials (id, garment_type, units, recycled_material) VALUES (3, 'Shirt', 400, false);", "sql": "SELECT SUM(units) FROM garment_units_recycled_materials WHERE garment_type LIKE '%Women%' AND recycled_material = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of the episode with the production code 2-19?", "schema": "CREATE TABLE table_234886_3 (title VARCHAR, prod_code VARCHAR)", "sql": "SELECT title FROM table_234886_3 WHERE prod_code = '2-19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average property price in the \"GreenCommunity\" and \"SolarVillage\" neighborhoods, grouped by property type?", "schema": "CREATE TABLE Property (id INT, neighborhood VARCHAR(20), price FLOAT, property_type VARCHAR(20)); INSERT INTO Property (id, neighborhood, price, property_type) VALUES (1, 'GreenCommunity', 500000, 'Apartment'), (2, 'SolarVillage', 700000, 'House');", "sql": "SELECT Property.property_type, AVG(Property.price) FROM Property WHERE Property.neighborhood IN ('GreenCommunity', 'SolarVillage') GROUP BY Property.property_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was David Backes' Offer Team?", "schema": "CREATE TABLE table_name_59 (offer_team VARCHAR, player VARCHAR)", "sql": "SELECT offer_team FROM table_name_59 WHERE player = 'david backes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Net profit/loss (SEK) has a Basic eps (SEK) of -6.58, and Employees (Average/Year) larger than 31,035?", "schema": "CREATE TABLE table_name_87 (net_profit_loss__sek_ INTEGER, basic_eps__sek_ VARCHAR, employees__average_year_ VARCHAR)", "sql": "SELECT SUM(net_profit_loss__sek_) FROM table_name_87 WHERE basic_eps__sek_ = -6.58 AND employees__average_year_ > 31 OFFSET 035;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 354).", "schema": null, "sql": "select jsonb_path_query('123', '$.bigint() * 2');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('123', '$.bigint() * 2')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total revenue for each music genre in Q1 2022?", "schema": "CREATE TABLE MusicStreaming(Genre VARCHAR(20), Revenue DECIMAL(10,2), Date DATE); INSERT INTO MusicStreaming(Genre, Revenue, Date) VALUES ('Pop', 5000, '2022-01-01'), ('Rock', 6000, '2022-01-01'), ('Jazz', 3000, '2022-01-01'), ('Pop', 5500, '2022-02-01'), ('Rock', 6500, '2022-02-01'), ('Jazz', 3200, '2022-02-01'), ('Pop', 6000, '2022-03-01'), ('Rock', 7000, '2022-03-01'), ('Jazz', 3500, '2022-03-01');", "sql": "SELECT Genre, SUM(Revenue) as Total_Revenue FROM MusicStreaming WHERE Date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY Genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the highest Long that has an Avg/G smaller than 8.4, and an GP-GS of 4–0?", "schema": "CREATE TABLE table_name_88 (long INTEGER, avg_g VARCHAR, gp_gs VARCHAR)", "sql": "SELECT MAX(long) FROM table_name_88 WHERE avg_g < 8.4 AND gp_gs = '4–0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What are the research grant titles that do not have a corresponding publication?", "schema": "CREATE TABLE grant (id INT, title VARCHAR(100)); CREATE TABLE publication (id INT, title VARCHAR(100), grant_id INT);", "sql": "SELECT g.title FROM grant g LEFT JOIN publication p ON g.title = p.title WHERE p.id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dcl_security", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the earliest year in which a country launched a satellite in the SpaceRadar table?", "schema": "CREATE TABLE SpaceRadar (id INT, country VARCHAR(50), year INT, satellites INT); INSERT INTO SpaceRadar (id, country, year, satellites) VALUES (1, 'USA', 2000, 10), (2, 'China', 2005, 8), (3, 'Russia', 1995, 12);", "sql": "SELECT country, MIN(year) AS earliest_year FROM SpaceRadar GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'triggers' (example 522).", "schema": null, "sql": "insert into merge_source_table\n values (1, 'initial1'), (2, 'initial2'),\n\t\t (3, 'initial3'), (4, 'initial4');", "explanation": "DML from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the percentage of financially capable individuals in the rural areas of the country?", "schema": "CREATE TABLE financial_capability_rural (location TEXT, capable BOOLEAN); INSERT INTO financial_capability_rural (location, capable) VALUES ('Village A', TRUE), ('Village B', FALSE), ('Village C', TRUE), ('Hamlet D', FALSE);", "sql": "SELECT (COUNT(*) FILTER (WHERE capable = TRUE)) * 100.0 / COUNT(*) FROM financial_capability_rural WHERE location LIKE '%rural%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 129, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'alter_generic' (example 142).", "schema": null, "sql": "-- Should fail. duplicate operator number / function number in ALTER OPERATOR FAMILY ... ADD FUNCTION\nCREATE OPERATOR FAMILY alt_opf17 USING btree;", "explanation": "PL/pgSQL object from PostgreSQL core test for Alter Generic.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 147, "num_statements": 1} {"question": "Insert a new record into the 'equipment' table for a centrifuge with an ID of 101", "schema": "CREATE TABLE equipment (equipment_id INT PRIMARY KEY, equipment_name VARCHAR(50), equipment_type VARCHAR(50));", "sql": "INSERT INTO equipment (equipment_id, equipment_name, equipment_type) VALUES (101, 'Centrifuge', 'Equipment');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "How many security incidents were recorded in each city in the last 6 months?", "schema": "CREATE TABLE security_incidents (id INT, city VARCHAR(255), timestamp TIMESTAMP);", "sql": "SELECT city, COUNT(*) FROM security_incidents WHERE timestamp >= NOW() - INTERVAL 6 MONTH GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "How many public libraries exist in urban and rural areas, respectively?", "schema": "CREATE TABLE Libraries (Location TEXT, Count INT); INSERT INTO Libraries (Location, Count) VALUES ('Urban', 150), ('Rural', 50);", "sql": "SELECT Location, Count FROM Libraries;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Calculate the moving average of water consumption by mining operations in the past 6 months.", "schema": "CREATE TABLE WaterConsumption (MineID INT, Date DATE, Consumption INT); INSERT INTO WaterConsumption (MineID, Date, Consumption) VALUES (1, '2021-07-01', 1000), (1, '2021-08-01', 1200), (1, '2021-09-01', 1100), (1, '2021-10-01', 1300), (1, '2021-11-01', 1400), (1, '2021-12-01', 1500), (2, '2021-07-01', 1600), (2, '2021-08-01', 1800), (2, '2021-09-01', 1700), (2, '2021-10-01', 1900), (2, '2021-11-01', 2000), (2, '2021-12-01', 2100);", "sql": "SELECT MineID, AVG(Consumption) OVER (PARTITION BY MineID ORDER BY Date ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) as MovingAvg FROM WaterConsumption;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the cataglogue EPA 4054 released with a 1/12/57 recorded?", "schema": "CREATE TABLE table_name_31 (release_date VARCHAR, catalogue VARCHAR, recorded VARCHAR)", "sql": "SELECT release_date FROM table_name_31 WHERE catalogue = 'epa 4054' AND recorded = '1/12/57';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the year of the disc with a catalogue number mash02?", "schema": "CREATE TABLE table_name_22 (year VARCHAR, catalogue_number VARCHAR)", "sql": "SELECT year FROM table_name_22 WHERE catalogue_number = 'mash02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Trofeo Fast Team when stage is 19?", "schema": "CREATE TABLE table_name_34 (trofeo_fast_team VARCHAR, stage VARCHAR)", "sql": "SELECT trofeo_fast_team FROM table_name_34 WHERE stage = '19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total value of defense contracts signed by company 'MNO Corp' in Q1 2020?", "schema": "CREATE TABLE defense_contracts (contract_id INT, company VARCHAR(255), value FLOAT, date DATE); INSERT INTO defense_contracts (contract_id, company, value, date) VALUES (3, 'MNO Corp', 300000, '2020-01-01'); INSERT INTO defense_contracts (contract_id, company, value, date) VALUES (4, 'DEF Inc', 450000, '2020-01-05');", "sql": "SELECT SUM(value) FROM defense_contracts WHERE company = 'MNO Corp' AND date BETWEEN '2020-01-01' AND '2020-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the department that has the largest number of students enrolled?", "schema": "CREATE TABLE enroll (class_code VARCHAR); CREATE TABLE CLASS (class_code VARCHAR, crs_code VARCHAR); CREATE TABLE course (dept_code VARCHAR, crs_code VARCHAR); CREATE TABLE department (dept_name VARCHAR, dept_code VARCHAR)", "sql": "SELECT T4.dept_name FROM CLASS AS T1 JOIN enroll AS T2 ON T1.class_code = T2.class_code JOIN course AS T3 ON T1.crs_code = T3.crs_code JOIN department AS T4 ON T3.dept_code = T4.dept_code GROUP BY T3.dept_code ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 241, "num_statements": 1} {"question": "How many rural health clinics are there in Australia and New Zealand that have a patient satisfaction score greater than 85?", "schema": "CREATE TABLE clinics (country VARCHAR(20), clinic_name VARCHAR(50), patient_satisfaction_score INT); INSERT INTO clinics (country, clinic_name, patient_satisfaction_score) VALUES ('Australia', 'Clinic E', 90), ('Australia', 'Clinic F', 80), ('New Zealand', 'Clinic G', 88), ('New Zealand', 'Clinic H', 92);", "sql": "SELECT country, COUNT(*) FROM clinics WHERE patient_satisfaction_score > 85 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Delete consumer awareness data for a specific region.", "schema": "CREATE TABLE consumer_awareness (region_id INT PRIMARY KEY, awareness_score INT, year INT);", "sql": "DELETE FROM consumer_awareness WHERE region_id = 456 AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the difference in popularity between the most and least popular songs per platform?", "schema": "CREATE TABLE songs (song_id INT, title TEXT, popularity INT, platform TEXT);", "sql": "SELECT platform, MAX(popularity) - MIN(popularity) FROM songs GROUP BY platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'int2': Write the SELECT query (example 11).", "schema": null, "sql": "SELECT pg_input_is_valid('asdf', 'int2');", "explanation": "Regression test for Int2 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_input_is_valid('asdf', 'int2')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total number of failed satellite launches by Israeli and Indian space programs?", "schema": "CREATE TABLE Satellite_Launches (launch_date DATE, country VARCHAR(255), success BOOLEAN); INSERT INTO Satellite_Launches (launch_date, country, success) VALUES ('2020-01-01', 'Israel', FALSE), ('2020-02-01', 'India', TRUE), ('2020-03-01', 'Israel', FALSE), ('2020-04-01', 'India', TRUE), ('2020-05-01', 'Israel', FALSE);", "sql": "SELECT SUM(success) AS total_failed_launches FROM (SELECT success FROM Satellite_Launches WHERE country IN ('Israel', 'India')) AS subquery WHERE success = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "How many pallets are currently stored in each warehouse, broken down by warehouse type?", "schema": "CREATE TABLE warehouses (id INT, type VARCHAR(50), pallets INT); INSERT INTO warehouses (id, type, pallets) VALUES (1, 'Cool', 200), (2, 'Dry', 300), (3, 'Frozen', 150);", "sql": "SELECT type, SUM(pallets) as total_pallets FROM warehouses GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show the total CO2 emissions for each store in the state of California.", "schema": "CREATE TABLE stores (store_id INT, store_name VARCHAR(255), store_state VARCHAR(255), co2_emissions INT);", "sql": "SELECT store_state, SUM(co2_emissions) as total_emissions FROM stores WHERE store_state = 'California' GROUP BY store_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the average expenditure per day for tourists visiting New Zealand from Europe?", "schema": "CREATE TABLE expenditures (destination_country VARCHAR(50), visitor_country VARCHAR(50), avg_daily_expenditure FLOAT); INSERT INTO expenditures (destination_country, visitor_country, avg_daily_expenditure) VALUES ('New Zealand', 'Europe', 150.0);", "sql": "SELECT avg_daily_expenditure FROM expenditures WHERE destination_country = 'New Zealand' AND visitor_country = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Find ingredients sourced from a specific country", "schema": "Ingredients (ingredient_id, name, source, last_updated)", "sql": "SELECT * FROM Ingredients WHERE source LIKE '%country%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the team mascot for the soccer team in Charlottetown?", "schema": "CREATE TABLE table_27369069_1 (varsity_name VARCHAR, city VARCHAR)", "sql": "SELECT varsity_name FROM table_27369069_1 WHERE city = 'Charlottetown';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Faith of –, and a Name of greenway has what type?", "schema": "CREATE TABLE table_name_79 (type VARCHAR, faith VARCHAR, name VARCHAR)", "sql": "SELECT type FROM table_name_79 WHERE faith = '–' AND name = 'greenway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of investments made by each investor?", "schema": "CREATE TABLE Investors (InvestorID INT, InvestorName VARCHAR(50)); CREATE TABLE Investments (InvestmentID INT, InvestorID INT, CompanyID INT, InvestmentAmount DECIMAL(10, 2));", "sql": "SELECT I.InvestorName, COUNT(I.InvestmentID) AS TotalInvestments FROM Investments I JOIN Investors ON I.InvestorID = Investors.InvestorID GROUP BY I.InvestorName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "What is the total number of streams for Latin songs in Brazil in 2020?", "schema": "CREATE TABLE Streaming (country VARCHAR(50), year INT, genre VARCHAR(50), streams INT); INSERT INTO Streaming (country, year, genre, streams) VALUES ('Brazil', 2020, 'Latin', 900000); INSERT INTO Streaming (country, year, genre, streams) VALUES ('Brazil', 2020, 'Latin', 950000);", "sql": "SELECT SUM(streams) FROM Streaming WHERE country = 'Brazil' AND year = 2020 AND genre = 'Latin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'float8' (example 13).", "schema": null, "sql": "INSERT INTO FLOAT8_TBL(f1) VALUES (' ');", "explanation": "DML from PostgreSQL core regression test for Float8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'dblink' (example 6).", "schema": null, "sql": "INSERT INTO foo VALUES (0,'a','{\"a0\",\"b0\",\"c0\"}');", "explanation": "Example query from the 'dblink' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the number of animals adopted by each community in 2018?", "schema": "CREATE TABLE CommunityAdoptions(Year INT, Community VARCHAR(20), Animals INT); INSERT INTO CommunityAdoptions VALUES (2017, 'CommunityA', 35), (2018, 'CommunityA', 40), (2017, 'CommunityB', 28), (2018, 'CommunityB', 32);", "sql": "SELECT Community, SUM(Animals) FROM CommunityAdoptions WHERE Year = 2018 GROUP BY Community;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for april 29?", "schema": "CREATE TABLE table_11964263_13 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_11964263_13 WHERE date = 'April 29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which event in the 2008 Beijing Games had a bronze medal?", "schema": "CREATE TABLE table_name_34 (event VARCHAR, games VARCHAR, medal VARCHAR)", "sql": "SELECT event FROM table_name_34 WHERE games = '2008 beijing' AND medal = 'bronze';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was ashley sampi's opponent?", "schema": "CREATE TABLE table_name_71 (opponent VARCHAR, player VARCHAR)", "sql": "SELECT opponent FROM table_name_71 WHERE player = 'ashley sampi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the date valid from and the date valid to for the card with card number '4560596484842'.", "schema": "CREATE TABLE Customers_cards (date_valid_from VARCHAR, date_valid_to VARCHAR, card_number VARCHAR)", "sql": "SELECT date_valid_from, date_valid_to FROM Customers_cards WHERE card_number = '4560596484842';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Rebounds did Novica Veličković get in less than 22 Games?", "schema": "CREATE TABLE table_name_6 (rebounds INTEGER, name VARCHAR, games VARCHAR)", "sql": "SELECT SUM(rebounds) FROM table_name_6 WHERE name = 'novica veličković' AND games < 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what weekday was the match that had Perth Glory as the away team?", "schema": "CREATE TABLE table_name_86 (weekday VARCHAR, away VARCHAR)", "sql": "SELECT weekday FROM table_name_86 WHERE away = 'perth glory';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the minimum stay required for volunteer tourism in Nepal?", "schema": "CREATE TABLE volunteer_programs (id INT, location VARCHAR(20), min_stay INT); INSERT INTO volunteer_programs (id, location, min_stay) VALUES (1, 'Nepal', 14), (2, 'Nepal', 21), (3, 'Nepal', 10);", "sql": "SELECT MIN(min_stay) FROM volunteer_programs WHERE location = 'Nepal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of funding records for companies with female founders in the Healthcare industry?", "schema": "CREATE TABLE Companies (id INT, name VARCHAR(50), industry VARCHAR(50), country VARCHAR(50), founding_year INT, founder_gender VARCHAR(10)); CREATE TABLE Funding (id INT, company_name VARCHAR(50), funding_amount INT); INSERT INTO Companies (id, name, industry, country, founding_year, founder_gender) VALUES (1, 'HealthHer', 'Healthcare', 'USA', 2016, 'Female'); INSERT INTO Funding (id, company_name, funding_amount) VALUES (1, 'HealthHer', 2000000);", "sql": "SELECT COUNT(*) as funding_records_count FROM Funding INNER JOIN Companies ON Funding.company_name = Companies.name WHERE Companies.industry = 'Healthcare' AND Companies.founder_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'date' (example 5).", "schema": null, "sql": "INSERT INTO DATE_TBL VALUES ('1996-02-29');", "explanation": "DML from PostgreSQL core regression test for Date.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the percentage of international tourists visiting museums in South America compared to North America?", "schema": "CREATE TABLE south_america_tourists (id INT, country TEXT, museum_visits INT); INSERT INTO south_america_tourists VALUES (1, 'Brazil', 2000), (2, 'Argentina', 3000); CREATE TABLE north_america_tourists (id INT, country TEXT, museum_visits INT); INSERT INTO north_america_tourists VALUES (1, 'USA', 5000), (2, 'Canada', 4000);", "sql": "SELECT 100.0 * SUM(museum_visits) / (SELECT SUM(museum_visits) FROM north_america_tourists) FROM south_america_tourists;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the number of students who have taken more than one lifelong learning course?", "schema": "CREATE TABLE students (student_id INT, student_name VARCHAR(255)); INSERT INTO students VALUES (1, 'Student A'), (2, 'Student B'), (3, 'Student C'), (4, 'Student D'); CREATE TABLE student_courses (student_id INT, course_id INT); INSERT INTO student_courses VALUES (1, 1), (1, 2), (2, 1), (3, 2), (3, 3), (4, 1), (4, 2), (4, 3); CREATE TABLE lifelong_learning_courses (course_id INT, course_name VARCHAR(255), course_type VARCHAR(255)); INSERT INTO lifelong_learning_courses VALUES (1, 'Lifelong Learning Course 1', 'Lifelong Learning'), (2, 'Lifelong Learning Course 2', 'Lifelong Learning'), (3, 'Professional Development Course 1', 'Professional Development');", "sql": "SELECT COUNT(sc.student_id) as students_with_more_than_one_lifelong_learning_course FROM student_courses sc JOIN lifelong_learning_courses llc ON sc.course_id = llc.course_id GROUP BY llc.course_type HAVING COUNT(sc.student_id) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'amcheck' (example 39).", "schema": null, "sql": "SELECT bt_index_parent_check('bttest_btree_partitioned_idx');", "explanation": "Example query from the 'amcheck' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the saturday saturnus ( saturn) with wednesday mercurius (mercury) being mercuridi", "schema": "CREATE TABLE table_1277350_1 (saturday_saturnus___saturn_ VARCHAR, wednesday_mercurius__mercury_ VARCHAR)", "sql": "SELECT saturday_saturnus___saturn_ FROM table_1277350_1 WHERE wednesday_mercurius__mercury_ = 'Mercuridi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total production for offshore wells, partitioned by the well's region and status?", "schema": "CREATE TABLE offshore_wells (well_id INT, well_name VARCHAR(255), location VARCHAR(255), production FLOAT, well_status VARCHAR(50), region VARCHAR(50)); INSERT INTO offshore_wells (well_id, well_name, location, production, well_status, region) VALUES (11, 'Well E', 'North Sea', 800.0, 'Active', 'Europe'), (12, 'Well F', 'North Sea', 700.0, 'Inactive', 'Europe');", "sql": "SELECT region, well_status, SUM(production) OVER (PARTITION BY region, well_status) as total_production FROM offshore_wells WHERE location = 'offshore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 152, "num_statements": 1} {"question": "What is the total area of cropland (in hectares) for each crop type?", "schema": "CREATE TABLE cropland_types (type VARCHAR(50), area INT); INSERT INTO cropland_types (type, area) VALUES ('Maize', 150000), ('Rice', 200000), ('Wheat', 180000);", "sql": "SELECT type, area FROM cropland_types;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_buffercache' (example 15).", "schema": null, "sql": "SELECT count(*) > 0 FROM pg_buffercache_os_pages;", "explanation": "Example query from the 'pg_buffercache' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Find the total number of marine research projects funded by the European Union and the United States in the last 3 years.", "schema": "CREATE TABLE research_projects (id INT, country VARCHAR(30), funder VARCHAR(30), project_name VARCHAR(50), date DATE); INSERT INTO research_projects (id, country, funder, project_name, date) VALUES (1, 'France', 'European Union', 'Marine Life Research', '2021-04-15'); INSERT INTO research_projects (id, country, funder, project_name, date) VALUES (2, 'Spain', 'United States', 'Ocean Mapping', '2020-07-22');", "sql": "SELECT SUM(total) FROM (SELECT COUNT(*) AS total FROM research_projects WHERE country = 'European Union' AND date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR) UNION ALL SELECT COUNT(*) AS total FROM research_projects WHERE country = 'United States' AND date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR)) AS combined_funders;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 320, "num_statements": 1} {"question": "What is the difference in budget allocation between infrastructure and social services in the state of California?", "schema": "CREATE TABLE states (state VARCHAR(255), service VARCHAR(255), budget INT); INSERT INTO states (state, service, budget) VALUES ('California', 'infrastructure', 1200000), ('California', 'social services', 800000);", "sql": "SELECT infrastructure_budget - social_services_budget AS budget_difference FROM (SELECT SUM(budget) AS infrastructure_budget FROM states WHERE state = 'California' AND service = 'infrastructure') AS infrastructure_budget, (SELECT SUM(budget) AS social_services_budget FROM states WHERE state = 'California' AND service = 'social services') AS social_services_budget;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 366, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which province has the contestant elixandra tobias carasco?", "schema": "CREATE TABLE table_name_81 (province VARCHAR, _community VARCHAR, contestant VARCHAR)", "sql": "SELECT province, _community FROM table_name_81 WHERE contestant = 'elixandra tobias carasco';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "How many wells in the 'WELL_COUNT' view are in the 'NORTH_SEA' region?", "schema": "CREATE VIEW WELL_COUNT AS SELECT COUNT(*), REGION FROM OIL_WELLS GROUP BY REGION;", "sql": "SELECT COUNT(*) FROM WELL_COUNT WHERE REGION = 'NORTH_SEA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par of the player with a t4 place and a score of 75-69-74-72=290?", "schema": "CREATE TABLE table_name_46 (to_par VARCHAR, place VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_46 WHERE place = 't4' AND score = 75 - 69 - 74 - 72 = 290;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 610).", "schema": null, "sql": "select jsonb_path_query('\"bogus\"', '$.timestamp_tz()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"bogus\"', '$.timestamp_tz()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Count the number of properties with inclusive housing policies in each city.", "schema": "CREATE TABLE inclusive_housing (id INT, property_id INT, city VARCHAR(20)); INSERT INTO inclusive_housing (id, property_id, city) VALUES (1, 1001, 'New York'), (2, 1002, 'Los Angeles'), (3, 1003, 'New York');", "sql": "SELECT city, COUNT(*) FROM inclusive_housing GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Add a new record to the \"warehouses\" table with the following data: warehouse_name = \"Mumbai Warehouse\", country = \"India\", capacity = 5000, and current_inventory = 3000", "schema": "CREATE TABLE warehouses (id INT, warehouse_name VARCHAR(50), country VARCHAR(50), capacity INT, current_inventory INT);", "sql": "INSERT INTO warehouses (id, warehouse_name, country, capacity, current_inventory) VALUES (1, 'Mumbai Warehouse', 'India', 5000, 3000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Calculate the total amount of resources depleted in the mining industry, broken down by resource type.", "schema": "CREATE TABLE resource_depletion (id INT, mining_operation_id INT, resource_type VARCHAR(50), amount_depleted FLOAT);", "sql": "SELECT resource_type, SUM(amount_depleted) FROM resource_depletion GROUP BY resource_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Which community programs have the most participants in Suburb I?", "schema": "CREATE TABLE CommunityPrograms (id INT, program_name VARCHAR(50), location VARCHAR(50), participants INT); INSERT INTO CommunityPrograms (id, program_name, location, participants) VALUES (1, 'Youth Mentoring', 'Suburb I', 150);", "sql": "SELECT program_name, MAX(participants) FROM CommunityPrograms WHERE location = 'Suburb I';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 83).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (1,7,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 336).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (7,8,'-6958475505053954666339703437.48985528725312694198056665033448258303533387675711770743843194274181580881296671866212320171337132096489224277825857521033238709600');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score agains Dynamo Kyiv, when the group position was 1st?", "schema": "CREATE TABLE table_name_39 (result_f___a VARCHAR, group_position VARCHAR, opponents VARCHAR)", "sql": "SELECT result_f___a FROM table_name_39 WHERE group_position = '1st' AND opponents = 'dynamo kyiv';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'btree_index' (example 78).", "schema": null, "sql": "insert into btree_bpchar values ('foo'), ('fool'), ('bar'), ('quux');", "explanation": "DML from PostgreSQL core regression test for Btree Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average amount of research grants received by faculty members in the Education department in the year 2018?", "schema": "CREATE TABLE Faculty (FacultyID INT, Name VARCHAR(50), Department VARCHAR(50), Gender VARCHAR(10), GrantAmt FLOAT, GrantYear INT);", "sql": "SELECT AVG(GrantAmt) FROM Faculty WHERE Department = 'Education' AND GrantYear = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The firs park stadium had the lowest average attendence of what?", "schema": "CREATE TABLE table_11206916_1 (average INTEGER, stadium VARCHAR)", "sql": "SELECT MIN(average) FROM table_11206916_1 WHERE stadium = 'Firs Park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What are the total waste generation quantities for each waste type, including recycling rates, for each region in 2019 and 2020?", "schema": "CREATE TABLE WasteGeneration (Region VARCHAR(50), WasteType VARCHAR(50), Quantity INT, Year INT); CREATE TABLE RecyclingRates (WasteType VARCHAR(50), RecyclingRate DECIMAL(5,2)); INSERT INTO WasteGeneration (Region, WasteType, Quantity, Year) VALUES ('RegionA', 'Plastic', 1200, 2019), ('RegionA', 'Glass', 1500, 2019), ('RegionB', 'Plastic', 1800, 2019), ('RegionB', 'Glass', 2000, 2019), ('RegionA', 'Plastic', 1300, 2020), ('RegionA', 'Glass', 1400, 2020), ('RegionB', 'Plastic', 1500, 2020), ('RegionB', 'Glass', 1800, 2020); INSERT INTO RecyclingRates (WasteType, RecyclingRate) VALUES ('Plastic', 0.2), ('Glass', 0.3);", "sql": "SELECT WasteGeneration.Region, WasteGeneration.WasteType, SUM(WasteGeneration.Quantity) AS TotalWasteQuantity, RecyclingRates.RecyclingRate FROM WasteGeneration INNER JOIN RecyclingRates ON WasteGeneration.WasteType = RecyclingRates.WasteType WHERE WasteGeneration.Year BETWEEN 2019 AND 2020 GROUP BY WasteGeneration.Region, WasteGeneration.WasteType, RecyclingRates.RecyclingRate;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 381, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of rounds that has a pick of 9 and is named jim pyburn?", "schema": "CREATE TABLE table_name_62 (round INTEGER, pick VARCHAR, name VARCHAR)", "sql": "SELECT SUM(round) FROM table_name_62 WHERE pick = 9 AND name = 'jim pyburn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total word count of articles written by John Doe?", "schema": "CREATE TABLE Articles (id INT, title VARCHAR(255), word_count INT, author VARCHAR(255)); INSERT INTO Articles (id, title, word_count, author) VALUES (1, 'Article 1', 500, 'John Doe'), (2, 'Article 2', 600, 'Jane Smith'), (3, 'Article 3', 700, 'John Doe'), (4, 'Article 4', 800, 'Jane Smith'), (5, 'Article 5', 900, 'John Doe'), (6, 'Article 6', 1000, 'John Doe'), (7, 'Article 7', 1100, 'Jane Smith'), (8, 'Article 8', 1200, 'John Doe'), (9, 'Article 9', 1300, 'Jane Smith'), (10, 'Article 10', 1400, 'John Doe'), (11, 'Article 11', 1500, 'Jane Smith'), (12, 'Article 12', 1600, 'John Doe');", "sql": "SELECT SUM(word_count) FROM Articles WHERE author = 'John Doe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average height of female basketball players in the players table?", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(50), position VARCHAR(50), height FLOAT, weight INT, team_id INT, league VARCHAR(50)); INSERT INTO players (player_id, name, position, height, weight, team_id, league) VALUES (1, 'Alice', 'Guard', 1.75, 70, 101, 'NBA');", "sql": "SELECT AVG(height) FROM players WHERE position = 'Guard' AND league = 'NBA' AND gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Matthew Warchus' Award?", "schema": "CREATE TABLE table_name_29 (award VARCHAR, nominee VARCHAR)", "sql": "SELECT award FROM table_name_29 WHERE nominee = 'matthew warchus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the software with version 1.2.2.0?", "schema": "CREATE TABLE table_15038373_1 (software VARCHAR, version VARCHAR)", "sql": "SELECT software FROM table_15038373_1 WHERE version = '1.2.2.0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What college did the defensive back attend?", "schema": "CREATE TABLE table_14650162_1 (college VARCHAR, position VARCHAR)", "sql": "SELECT college FROM table_14650162_1 WHERE position = 'Defensive Back';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many traditional artists are engaged in each cultural preservation program?", "schema": "CREATE TABLE traditional_artists (id INT, name VARCHAR(50), program VARCHAR(50), location VARCHAR(50)); INSERT INTO traditional_artists (id, name, program, location) VALUES (1, 'John Doe', 'Weaving', 'Peru'), (2, 'Jane Smith', 'Pottery', 'Bolivia');", "sql": "SELECT program, COUNT(*) FROM traditional_artists GROUP BY program;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result after 1994 for team MB2 and 31 starts?", "schema": "CREATE TABLE table_name_64 (finish VARCHAR, start VARCHAR, year VARCHAR, team VARCHAR)", "sql": "SELECT finish FROM table_name_64 WHERE year > 1994 AND team = 'mb2' AND start = '31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete all records in the 'oil_spills' table where 'spill_year' is before 1990", "schema": "CREATE TABLE oil_spills (spill_id INT PRIMARY KEY, spill_name VARCHAR(50), spill_year INT, spilled_volume FLOAT);", "sql": "DELETE FROM oil_spills WHERE spill_year < 1990;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "PostgreSQL regression test 'enum': Write the SELECT query (example 140).", "schema": null, "sql": "SELECT 'bad'::bogon;", "explanation": "Regression test for Enum in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'bad'::bogon) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 20, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Latin for \"you sang\"?", "schema": "CREATE TABLE table_26614365_1 (latin VARCHAR, english VARCHAR)", "sql": "SELECT latin FROM table_26614365_1 WHERE english = 'you sang';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which cruelty-free makeup brands have the highest average rating?", "schema": "CREATE TABLE cruelty_free_makeup (brand VARCHAR(255), product VARCHAR(255), rating DECIMAL(2,1), cruelty_free BOOLEAN); INSERT INTO cruelty_free_makeup (brand, product, rating, cruelty_free) VALUES ('Pacifica', 'Foundation', 4.3, true), ('NYX', 'Mascara', 4.5, true), ('e.l.f.', 'Eyeshadow', 4.2, true), ('Milani', 'Lipstick', 4.4, true);", "sql": "SELECT brand, AVG(rating) as avg_rating FROM cruelty_free_makeup WHERE cruelty_free = true GROUP BY brand ORDER BY avg_rating DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "List the broadband subscribers with compliance issues and the corresponding compliance issue description, along with the subscribers' geographic areas.", "schema": "CREATE TABLE geographic_area (geographic_area VARCHAR(20)); INSERT INTO geographic_area (geographic_area) VALUES ('urban'), ('rural'); CREATE TABLE broadband_subscribers (subscriber_id INT, name VARCHAR(50), geographic_area VARCHAR(20), has_compliance_issue INT); CREATE TABLE compliance_issues (issue_id INT, description VARCHAR(100)); INSERT INTO broadband_subscribers (subscriber_id, name, geographic_area, has_compliance_issue) VALUES (1, 'Jane Doe', 'urban', 1); INSERT INTO compliance_issues (issue_id, description) VALUES (1, 'Non-payment of annual fee');", "sql": "SELECT broadband_subscribers.name, geographic_area.geographic_area, compliance_issues.description FROM broadband_subscribers JOIN geographic_area ON broadband_subscribers.geographic_area = geographic_area.geographic_area JOIN compliance_issues ON broadband_subscribers.has_compliance_issue = compliance_issues.issue_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 319, "num_statements": 1} {"question": "What is the most common type of emergency incident in the Central district?", "schema": "CREATE TABLE emergency_incidents (id INT, district VARCHAR(20), type VARCHAR(20), date DATE); INSERT INTO emergency_incidents (id, district, type, date) VALUES (1, 'Downtown', 'Fire', '2022-01-01'); INSERT INTO emergency_incidents (id, district, type, date) VALUES (2, 'Uptown', 'Medical', '2022-01-01'); INSERT INTO emergency_incidents (id, district, type, date) VALUES (3, 'Central', 'Fire', '2022-01-01'); INSERT INTO emergency_incidents (id, district, type, date) VALUES (4, 'Central', 'Traffic', '2022-01-02');", "sql": "SELECT type, COUNT(*) AS count FROM emergency_incidents WHERE district = 'Central' GROUP BY type ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent of the match with a win result and a time of 3:02?", "schema": "CREATE TABLE table_name_13 (opponent VARCHAR, res VARCHAR, time VARCHAR)", "sql": "SELECT opponent FROM table_name_13 WHERE res = 'win' AND time = '3:02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the minimum number of ethical AI courses offered in South America in 2020?", "schema": "CREATE TABLE ethical_ai_courses_south_america (country VARCHAR(20), year INT, courses INT); INSERT INTO ethical_ai_courses_south_america (country, year, courses) VALUES ('Brazil', 2020, 10), ('Argentina', 2020, 15), ('Colombia', 2020, 12);", "sql": "SELECT MIN(courses) FROM ethical_ai_courses_south_america WHERE country = 'South America' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the distribution of ticket prices for each team, split into price ranges (low, medium, high)?", "schema": "CREATE TABLE tickets (ticket_id INT, team_id INT, ticket_price DECIMAL(5,2), price_range ENUM('low', 'medium', 'high')); INSERT INTO tickets (ticket_id, team_id, ticket_price, price_range) VALUES (1, 1, 50, 'low'); INSERT INTO tickets (ticket_id, team_id, ticket_price, price_range) VALUES (2, 2, 100, 'medium');", "sql": "SELECT teams.team_name, price_range, COUNT(*) as ticket_count FROM tickets JOIN teams ON tickets.team_id = teams.team_id GROUP BY teams.team_name, price_range;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 123 south broad street address name before 1973?", "schema": "CREATE TABLE table_name_33 (name VARCHAR, year VARCHAR, address VARCHAR)", "sql": "SELECT name FROM table_name_33 WHERE year < 1973 AND address = '123 south broad street';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 7).", "schema": null, "sql": "SELECT count(*) FROM bittmp WHERE a > '011011000100010111011000110000100';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Game # that scored 87-92?", "schema": "CREATE TABLE table_name_46 (game INTEGER, score VARCHAR)", "sql": "SELECT SUM(game) FROM table_name_46 WHERE score = '87-92';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average carbon offset of Green building projects in the Middle East?", "schema": "CREATE TABLE green_building_projects (project_id INT, project_name VARCHAR(50), region VARCHAR(20), carbon_offsets INT); INSERT INTO green_building_projects (project_id, project_name, region, carbon_offsets) VALUES (1, 'Green Office', 'Middle East', 600), (2, 'Sustainable Apartments', 'Middle East', 800), (3, 'Eco-friendly Mall', 'Africa', 1000);", "sql": "SELECT AVG(carbon_offsets) FROM green_building_projects WHERE region = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of cybersecurity incidents reported by the military in 2020?", "schema": "CREATE TABLE CybersecurityIncidents (id INT, branch VARCHAR(255), year INT, incidents INT); INSERT INTO CybersecurityIncidents (id, branch, year, incidents) VALUES (1, 'Air Force', 2019, 20), (2, 'Navy', 2018, 30), (3, 'Army', 2020, 40), (4, 'Air Force', 2020, 50), (5, 'Navy', 2020, 60), (6, 'Army', 2019, 70);", "sql": "SELECT SUM(incidents) FROM CybersecurityIncidents WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was born on 10 May 1788 that ceased to be queen?", "schema": "CREATE TABLE table_name_3 (ceased_to_be_queen VARCHAR, birth VARCHAR)", "sql": "SELECT ceased_to_be_queen FROM table_name_3 WHERE birth = '10 may 1788';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average size of artworks displayed in the 'ContemporaryArt' gallery?", "schema": "CREATE TABLE Artworks (ArtworkID INT, Title VARCHAR(50), Gallery VARCHAR(50), Size INT); INSERT INTO Artworks (ArtworkID, Title, Gallery, Size) VALUES (1, 'Untitled', 'ContemporaryArt', 100); INSERT INTO Artworks (ArtworkID, Title, Gallery, Size) VALUES (2, 'Untitled2', 'ContemporaryArt', 200);", "sql": "SELECT AVG(Size) FROM Artworks WHERE Gallery = 'ContemporaryArt';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average fare for public buses in New York City?", "schema": "CREATE TABLE public_transportation (city VARCHAR(20), transport_type VARCHAR(20), fare DECIMAL(5,2)); INSERT INTO public_transportation VALUES ('New York City', 'Bus', 2.75);", "sql": "SELECT AVG(fare) FROM public_transportation WHERE city = 'New York City' AND transport_type = 'Bus';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the main location for public and masters university", "schema": "CREATE TABLE table_2076533_1 (main_location VARCHAR, control VARCHAR, type VARCHAR)", "sql": "SELECT main_location FROM table_2076533_1 WHERE control = 'Public' AND type = 'Masters university';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "List all mines in China that mined nickel in 2018", "schema": "CREATE TABLE mining_operations (id INT, mine_name TEXT, location TEXT, material TEXT, quantity INT, date DATE); INSERT INTO mining_operations (id, mine_name, location, material, quantity, date) VALUES (8, 'Nickel Nexus', 'China', 'nickel', 6000, '2018-01-01');", "sql": "SELECT DISTINCT mine_name FROM mining_operations WHERE material = 'nickel' AND location = 'China' AND date = '2018-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the VFL pay at Victoria Park?", "schema": "CREATE TABLE table_name_17 (date VARCHAR, venue VARCHAR)", "sql": "SELECT date FROM table_name_17 WHERE venue = 'victoria park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sample size of the poll taken on Dec 13-15, 2007 that had a margin of error of more than 4 and resulted with Republican Mike Huckabee?", "schema": "CREATE TABLE table_name_5 (sample_size INTEGER, date VARCHAR, republican VARCHAR, margin_of_error VARCHAR)", "sql": "SELECT AVG(sample_size) FROM table_name_5 WHERE republican = 'mike huckabee' AND margin_of_error > 4 AND date = 'dec 13-15, 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Result, when Date is \"2000-05-23\"?", "schema": "CREATE TABLE table_name_57 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_57 WHERE date = '2000-05-23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of satellites deployed per year and the percentage change in satellite deployment from the previous year, by country?", "schema": "CREATE SCHEMA Satellite;CREATE TABLE Satellite.SatelliteDeployment (country VARCHAR(50), year INT, num_satellites INT);INSERT INTO Satellite.SatelliteDeployment (country, year, num_satellites) VALUES ('USA', 2010, 100), ('China', 2010, 50), ('Russia', 2010, 40), ('India', 2010, 30), ('USA', 2011, 120), ('China', 2011, 60), ('Russia', 2011, 45), ('India', 2011, 35);", "sql": "SELECT s1.country, s1.year, s1.num_satellites, (s1.num_satellites - COALESCE(s2.num_satellites, 0)) * 100.0 / COALESCE(s2.num_satellites, 1) AS percentage_change FROM Satellite.SatelliteDeployment s1 LEFT JOIN Satellite.SatelliteDeployment s2 ON s1.country = s2.country AND s1.year = s2.year + 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 296, "num_statements": 1} {"question": "What is the total number of concerts for the Hip Hop genre in Asia in the last 2 years?", "schema": "CREATE TABLE concerts (concert_id INT, artist_id INT, genre VARCHAR(50), country VARCHAR(50), timestamp TIMESTAMP); INSERT INTO concerts (concert_id, artist_id, genre, country, timestamp) VALUES (1, 2001, 'Hip Hop', 'Japan', '2022-01-01 00:00:00'), (2, 2002, 'Hip Hop', 'China', '2022-01-02 12:30:00'); CREATE TABLE artists (artist_id INT, name VARCHAR(100)); INSERT INTO artists (artist_id, name) VALUES (2001, 'Kendrick Lamar'), (2002, 'Drake');", "sql": "SELECT COUNT(*) FROM concerts JOIN artists ON concerts.artist_id = artists.artist_id WHERE artists.genre = 'Hip Hop' AND concerts.country IN ('Japan', 'China', 'India', 'South Korea', 'Indonesia') AND concerts.timestamp >= '2020-01-01' AND concerts.timestamp < '2022-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 274, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the winner for genoa to livorno", "schema": "CREATE TABLE table_name_39 (winner VARCHAR, course VARCHAR)", "sql": "SELECT winner FROM table_name_39 WHERE course = 'genoa to livorno';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Which courses have more than 50 students enrolled?", "schema": "CREATE TABLE courses (course_id INT, course_name VARCHAR(255), num_students INT); INSERT INTO courses (course_id, course_name, num_students) VALUES (101, 'Intro to Psychology', 60), (102, 'Data Science', 75), (103, 'Calculus', 45);", "sql": "SELECT course_name FROM courses WHERE num_students > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Which rural health clinics were established after 2010, and what are their names?", "schema": "CREATE TABLE clinics (name VARCHAR(255), establishment_date DATE); INSERT INTO clinics (name, establishment_date) VALUES ('Clinic C', '2011-01-01'), ('Clinic D', '2015-05-15');", "sql": "SELECT name FROM clinics WHERE establishment_date > '2010-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 258).", "schema": null, "sql": "SELECT * FROM check_test(\n is_aggregate( 'someproc' ),\n false,\n 'is_aggregate(proc)',\n 'Function someproc() should be an aggregate function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location of the perpetrator with the largest kills.", "schema": "CREATE TABLE perpetrator (LOCATION VARCHAR, Killed VARCHAR)", "sql": "SELECT LOCATION FROM perpetrator ORDER BY Killed DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: player is jim henshall what are all the position", "schema": "CREATE TABLE table_26996293_2 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_26996293_2 WHERE player = 'Jim Henshall';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sequence' (example 21).", "schema": null, "sql": "CREATE SEQUENCE sequence_testx AS smallint MAXVALUE 100000;", "explanation": "DDL from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of unique marine species observed by expeditions for the 'Marine Investigators' organization?", "schema": "CREATE TABLE expedition (org VARCHAR(20), species VARCHAR(50)); INSERT INTO expedition VALUES ('Ocean Explorer', 'Dolphin'), ('Ocean Explorer', 'Tuna'), ('Sea Discoverers', 'Shark'), ('Sea Discoverers', 'Whale'), ('Marine Investigators', 'Starfish'), ('Marine Investigators', 'Jellyfish'), ('Marine Investigators', 'Coral'), ('Deep Sea Divers', 'Squid');", "sql": "SELECT COUNT(DISTINCT species) FROM expedition WHERE org = 'Marine Investigators';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 188).", "schema": null, "sql": "select jsonb_path_query('\"a\"', '-$');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"a\"', '-$')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.3.2--2.3.3, item 10).", "schema": null, "sql": "/*\n * Create the trigger function for the parent table of a time-based partition set\n */\nCREATE OR REPLACE FUNCTION create_function_time(p_parent_table text, p_job_id bigint DEFAULT NULL) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_control text;\nv_count int;\nv_current_partition_name text;\nv_current_partition_timestamp timestamptz;\nv_datetime_string text;\nv_epoch boolean;\nv_final_partition_timestamp timestamptz;\nv_function_name text;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_old_search_path text;\nv_new_length int;\nv_next_partition_name text;\nv_next_partition_timestamp timestamptz;\nv_parent_schema text;\nv_parent_tablename text;\nv_partition_interval interval;\nv_prev_partition_name text;\nv_prev_partition_timestamp timestamptz;\nv_step_id bigint;\nv_trig_func text;\nv_optimize_trigger int;\nv_type text;\n\nBEGIN\n\nSELECT partition_type\n , partition_interval::interval\n , epoch\n , control\n , optimize_trigger\n , datetime_string\n , jobmon\nINTO v_type\n , v_partition_interval\n , v_epoch\n , v_control\n , v_optimize_trigger\n , v_datetime_string\n , v_jobmon\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table\nAND (partition_type = 'time' OR partition_type = 'time-custom');\n\nIF NOT FOUND THEN\n RAISE EXCEPTION 'ERROR: no config found for %', p_parent_table;\nEND IF;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_namespace n, pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', '@extschema@,'||v_jobmon_schema, 'false');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF p_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN CREATE FUNCTION: %s', p_parent_table));\n ELSE\n v_job_id = p_job_id;\n END IF;\n v_step_id := add_step(v_job_id, format('Creating partition function for table %s', p_parent_table));\nEND IF;\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)::name\nAND tablename = split_part(p_parent_table, '.', 2)::name;\n\nv_function_name := @extschema@.check_name_length(v_parent_tablename, '_part_trig_func', FALSE);\n\nIF v_type = 'time' THEN\n v_trig_func := format('CREATE OR REPLACE FUNCTION %I.%I() RETURNS trigger LANGUAGE plpgsql AS $t$\n DECLARE\n v_count int;\n v_partition_name text;\n v_partition_timestamp timestamptz;\n BEGIN\n IF TG_OP = ''INSERT'' THEN\n '\n , v_parent_schema\n , v_function_name);\n\n IF v_epoch = false THEN\n CASE\n WHEN v_partition_interval = '15 mins' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', NEW.%I) +\n ''15min''::interval * floor(date_part(''minute'', NEW.%I) / 15.0);' , v_control , v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '15min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 15.0);\n WHEN v_partition_interval = '30 mins' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', NEW.%I) +\n ''30min''::interval * floor(date_part(''minute'', NEW.%I) / 30.0);' , v_control , v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '30min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 30.0);\n WHEN v_partition_interval = '1 hour' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 day' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''day'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('day', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 week' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''week'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('week', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 month' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''month'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('month', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '3 months' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''quarter'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('quarter', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 year' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''year'', NEW.%I);', v_control);\n v_current_partition_timestamp := date_trunc('year', CURRENT_TIMESTAMP);\n END CASE;\n ELSE -- epoch is true\n CASE\n WHEN v_partition_interval = '15 mins' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', to_timestamp(NEW.%I)) +\n ''15min''::interval * floor(date_part(''minute'', NEW.%I) / 15.0);' , v_control , v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '15min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 15.0);\n WHEN v_partition_interval = '30 mins' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', to_timestamp(NEW.%I)) +\n ''30min''::interval * floor(date_part(''minute'', NEW.%I) / 30.0);' , v_control , v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '30min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 30.0);\n WHEN v_partition_interval = '1 hour' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''hour'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 day' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''day'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('day', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 week' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''week'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('week', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 month' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''month'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('month', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '3 months' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''quarter'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('quarter', CURRENT_TIMESTAMP);\n WHEN v_partition_interval = '1 year' THEN\n v_trig_func := v_trig_func||format('v_partition_timestamp := date_trunc(''year'', to_timestamp(NEW.%I));', v_control);\n v_current_partition_timestamp := date_trunc('year', CURRENT_TIMESTAMP);\n END CASE;\n END IF;\n\n v_current_partition_name := @extschema@.check_name_length(v_parent_tablename, to_char(v_current_partition_timestamp, v_datetime_string), TRUE);\n v_next_partition_timestamp := v_current_partition_timestamp + v_partition_interval::interval;\n\n IF v_epoch = false THEN\n v_trig_func := v_trig_func ||format('\n IF NEW.%I >= %L AND NEW.%I < %L THEN '\n , v_control\n , v_current_partition_timestamp\n , v_control\n , v_next_partition_timestamp);\n ELSE\n v_trig_func := v_trig_func ||format('\n IF to_timestamp(NEW.%I) >= %L AND to_timestamp(NEW.%I) < %L THEN '\n , v_control\n , v_current_partition_timestamp\n , v_control\n , v_next_partition_timestamp);\n END IF;\n SELECT count(*) INTO v_count FROM pg_catalog.pg_tables WHERE schemaname = v_parent_schema::name AND tablename = v_current_partition_name::name;\n IF v_count > 0 THEN\n v_trig_func := v_trig_func || format('\n INSERT INTO %I.%I VALUES (NEW.*); ', v_parent_schema, v_current_partition_name);\n ELSE\n v_trig_func := v_trig_func || '\n -- Child table for current values does not exist in this partition set, so write to parent\n RETURN NEW;';\n END IF;\n FOR i IN 1..v_optimize_trigger LOOP\n v_prev_partition_timestamp := v_current_partition_timestamp - (v_partition_interval::interval * i);\n v_next_partition_timestamp := v_current_partition_timestamp + (v_partition_interval::interval * i);\n v_final_partition_timestamp := v_next_partition_timestamp + (v_partition_interval::interval);\n v_prev_partition_name := @extschema@.check_name_length(v_parent_tablename, to_char(v_prev_partition_timestamp, v_datetime_string), TRUE);\n v_next_partition_name := @extschema@.check_name_length(v_parent_tablename, to_char(v_next_partition_timestamp, v_datetime_string), TRUE);\n\n -- Check that child table exist before making a rule to insert to them.\n -- Handles optimize_trigger being larger than premake (to go back in time further) and edge case of changing optimize_trigger immediately after running create_parent().\n SELECT count(*) INTO v_count FROM pg_catalog.pg_tables WHERE schemaname = v_parent_schema::name AND tablename = v_prev_partition_name::name;\n IF v_count > 0 THEN\n IF v_epoch = false THEN\n v_trig_func := v_trig_func ||format('\n ELSIF NEW.%I >= %L AND NEW.%I < %L THEN\n INSERT INTO %I.%I VALUES (NEW.*);'\n , v_control\n , v_prev_partition_timestamp\n , v_control\n , v_prev_partition_timestamp + v_partition_interval::interval\n , v_parent_schema\n , v_prev_partition_name);\n ELSE\n v_trig_func := v_trig_func ||format('\n ELSIF to_timestamp(NEW.%I) >= %L AND to_timestamp(NEW.%I) < %L THEN\n INSERT INTO %I.%I VALUES (NEW.*);'\n , v_control\n , v_prev_partition_timestamp\n , v_control\n , v_prev_partition_timestamp + v_partition_interval::interval\n , v_parent_schema\n , v_prev_partition_name);\n\n END IF;\n END IF;\n SELECT count(*) INTO v_count FROM pg_catalog.pg_tables WHERE schemaname = v_parent_schema::name AND tablename = v_next_partition_name::name;\n IF v_count > 0 THEN\n IF v_epoch = false THEN\n v_trig_func := v_trig_func ||format('\n ELSIF NEW.%I >= %L AND NEW.%I < %L THEN\n INSERT INTO %I.%I VALUES (NEW.*);'\n , v_control\n , v_next_partition_timestamp\n , v_control\n , v_final_partition_timestamp\n , v_parent_schema\n , v_next_partition_name);\n ELSE\n v_trig_func := v_trig_func ||format('\n ELSIF to_timestamp(NEW.%I) >= %L AND to_timestamp(NEW.%I) < %L THEN\n INSERT INTO %I.%I VALUES (NEW.*);'\n , v_control\n , v_next_partition_timestamp\n , v_control\n , v_final_partition_timestamp\n , v_parent_schema\n , v_next_partition_name);\n\n END IF;\n END IF;\n\n END LOOP;\n\n v_trig_func := v_trig_func||format('\n ELSE\n v_partition_name := @extschema@.check_name_length(%L, to_char(v_partition_timestamp, %L), TRUE);\n SELECT count(*) INTO v_count FROM pg_catalog.pg_tables WHERE schemaname = %L::name AND tablename = v_partition_name::name;\n IF v_count > 0 THEN\n EXECUTE format(''INSERT INTO %%I.%%I VALUES($1.*)'', %L, v_partition_name) USING NEW;\n ELSE\n RETURN NEW;\n END IF;\n END IF;'\n , v_parent_tablename\n , v_datetime_string\n , v_parent_schema\n , v_parent_schema);\n\n v_trig_func := v_trig_func ||'\n END IF;\n RETURN NULL;\n EXCEPTION WHEN OTHERS THEN\n RAISE WARNING ''pg_partman insert into child table failed, row inserted into parent (%.%). ERROR: %'', TG_TABLE_SCHEMA, TG_TABLE_NAME, COALESCE(SQLERRM, ''unknown'');\n RETURN NEW;\n END $t$;';\n\n EXECUTE v_trig_func;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('Added function for current time interval: %s to %s'\n , v_current_partition_timestamp\n , v_final_partition_timestamp-'1sec'::interval));\n END IF;\n\nELSIF v_type = 'time-custom' THEN\n\n v_trig_func := format('CREATE OR REPLACE FUNCTION %I.%I() RETURNS trigger LANGUAGE plpgsql AS $t$\n DECLARE\n v_child_schemaname text;\n v_child_table text;\n v_child_tablename text;\n BEGIN\n '\n , v_parent_schema\n , v_function_name);\n\n IF v_epoch = false THEN\n v_trig_func := v_trig_func || format('\n\n SELECT child_table INTO v_child_table\n FROM @extschema@.custom_time_partitions\n WHERE partition_range @> NEW.%I\n AND parent_table = %L;'\n , v_control\n , v_parent_schema||'.'||v_parent_tablename);\n\n ELSE -- epoch true\n v_trig_func := v_trig_func || format('\n\n SELECT child_table INTO v_child_table\n FROM @extschema@.custom_time_partitions\n WHERE partition_range @> to_timestamp(NEW.%I)\n AND parent_table = %L;'\n , v_control\n , v_parent_schema||'.'||v_parent_tablename);\n\n END IF;\n\n v_trig_func := v_trig_func || '\n\n SELECT schemaname, tablename INTO v_child_schemaname, v_child_tablename\n FROM pg_catalog.pg_tables\n WHERE schemaname = split_part(v_child_table, ''.'', 1)::name\n AND tablename = split_part(v_child_table, ''.'', 2)::name;\n IF v_child_schemaname IS NOT NULL AND v_child_tablename IS NOT NULL THEN\n EXECUTE format(''INSERT INTO %I.%I VALUES ($1.*)'', v_child_schemaname, v_child_tablename) USING NEW;\n ELSE\n RETURN NEW;\n END IF;\n\n RETURN NULL;\n EXCEPTION WHEN OTHERS THEN\n RAISE WARNING ''pg_partman insert into child table failed, row inserted into parent (%.%). ERROR: %'', TG_TABLE_SCHEMA, TG_TABLE_NAME, COALESCE(SQLERRM, ''unknown'');\n RETURN NEW;\n END $t$;';\n\n EXECUTE v_trig_func;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('Added function for custom time table: %s', p_parent_table));\n END IF;\n\nELSE\n RAISE EXCEPTION 'ERROR: Invalid time partitioning type given: %', v_type;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN CREATE FUNCTION: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''Partition function maintenance for table %s failed'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 17968, "num_statements": 181} {"question": "Generate PostgreSQL SQL for: What was the to par that goes with the score 68-74-69=211?", "schema": "CREATE TABLE table_name_70 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_70 WHERE score = 68 - 74 - 69 = 211;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which gold Coast has a Melbourne of yes, a Perth of yes, an Auckland of no, and a Sydney of no?", "schema": "CREATE TABLE table_name_36 (gold_coast VARCHAR, sydney VARCHAR, auckland VARCHAR, melbourne VARCHAR, perth VARCHAR)", "sql": "SELECT gold_coast FROM table_name_36 WHERE melbourne = 'yes' AND perth = 'yes' AND auckland = 'no' AND sydney = 'no';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games did they play the carolina hurricanes?", "schema": "CREATE TABLE table_23453931_8 (game VARCHAR, opponent VARCHAR)", "sql": "SELECT COUNT(game) FROM table_23453931_8 WHERE opponent = 'Carolina Hurricanes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score for the game at giants stadium when the indianapolis colts were the visiting team?", "schema": "CREATE TABLE table_name_9 (final_score VARCHAR, visiting_team VARCHAR, stadium VARCHAR)", "sql": "SELECT final_score FROM table_name_9 WHERE visiting_team = 'indianapolis colts' AND stadium = 'giants stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Which indigenous communities are represented in the 'arctic_communities' table, and what is their total population?", "schema": "CREATE TABLE arctic_communities (name TEXT, population INTEGER);", "sql": "SELECT name, SUM(population) FROM arctic_communities GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Position has a Round larger than 4, and a Player of patrick johnson?", "schema": "CREATE TABLE table_name_45 (position VARCHAR, round VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_name_45 WHERE round > 4 AND player = 'patrick johnson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the minimum funding amount received by companies founded in the United States?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE investment_round (id INT, company_id INT, funding_amount INT); INSERT INTO company (id, name, country) VALUES (1, 'Acme Corp', 'USA'); INSERT INTO investment_round (id, company_id, funding_amount) VALUES (1, 1, 500000); INSERT INTO investment_round (id, company_id, funding_amount) VALUES (2, 1, 750000); INSERT INTO company (id, name, country) VALUES (2, 'Maple Leaf Technologies', 'Canada'); INSERT INTO investment_round (id, company_id, funding_amount) VALUES (3, 2, 250000);", "sql": "SELECT MIN(ir.funding_amount) AS min_funding_amount FROM company c JOIN investment_round ir ON c.id = ir.company_id WHERE c.country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Insert new records for the players who have recently signed up for the esports tournament.", "schema": "CREATE TABLE EsportsTournament (TournamentID INT, PlayerID INT, TournamentDate DATE); INSERT INTO EsportsTournament (TournamentID, PlayerID, TournamentDate) VALUES (1, 6, '2021-07-01'), (2, 7, '2021-07-01'), (3, 8, '2021-07-01');", "sql": "INSERT INTO EsportsTournament (TournamentID, PlayerID, TournamentDate) VALUES (4, 9, '2021-07-01'), (5, 10, '2021-07-01'), (6, 11, '2021-07-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the names of technicians who have not been assigned to repair machines.", "schema": "CREATE TABLE technician (Name VARCHAR, technician_id VARCHAR); CREATE TABLE repair_assignment (Name VARCHAR, technician_id VARCHAR)", "sql": "SELECT Name FROM technician WHERE NOT technician_id IN (SELECT technician_id FROM repair_assignment);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What home team score has a Away team of melbourne?", "schema": "CREATE TABLE table_name_63 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_63 WHERE away_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Term of office for Leonard Reynolds of NSW?", "schema": "CREATE TABLE table_name_1 (term_of_office VARCHAR, state VARCHAR, member VARCHAR)", "sql": "SELECT term_of_office FROM table_name_1 WHERE state = 'nsw' AND member = 'leonard reynolds';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which MWEHL team has player Ben Johnson?", "schema": "CREATE TABLE table_name_29 (mwehl_team VARCHAR, player VARCHAR)", "sql": "SELECT mwehl_team FROM table_name_29 WHERE player = 'ben johnson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show all orders for the most caloric product.", "schema": "CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), supplier_id INT, FOREIGN KEY (supplier_id) REFERENCES suppliers(id)); CREATE TABLE nutrients (id INT PRIMARY KEY, product_id INT, calories INT, FOREIGN KEY (product_id) REFERENCES products(id)); CREATE TABLE orders (id INT PRIMARY KEY, product_id INT, order_date DATE, quantity INT, FOREIGN KEY (product_id) REFERENCES products(id)); INSERT INTO products (id, name, category, supplier_id) VALUES (6, 'Avocado', 'Fruits', 6); INSERT INTO nutrients (id, product_id, calories) VALUES (6, 6, 234); INSERT INTO orders (id, product_id, order_date, quantity) VALUES (6, 6, '2022-01-08', 30);", "sql": "SELECT o.id, o.product_id, o.order_date, o.quantity FROM orders o JOIN products p ON o.product_id = p.id WHERE p.id = (SELECT product_id FROM nutrients WHERE calories = (SELECT MAX(calories) FROM nutrients));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "Update the depth of all marine protected areas in the Indian Ocean by 10%.", "schema": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), location VARCHAR(255), depth FLOAT); INSERT INTO marine_protected_areas (id, name, location, depth) VALUES (1, 'MPA 1', 'Pacific Ocean', 123.4), (2, 'MPA 2', 'Atlantic Ocean', 150.0), (3, 'MPA 3', 'Indian Ocean', 75.0);", "sql": "UPDATE marine_protected_areas SET depth = depth * 1.1 WHERE location = 'Indian Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average agricultural panel of the composition with a labour panel less than 6, more than 0 nominations by Taoiseach, and a total less than 4?", "schema": "CREATE TABLE table_name_62 (agricultural_panel INTEGER, total VARCHAR, labour_panel VARCHAR, nominated_by_the_taoiseach VARCHAR)", "sql": "SELECT AVG(agricultural_panel) FROM table_name_62 WHERE labour_panel < 6 AND nominated_by_the_taoiseach > 0 AND total < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the last name of the individuals that have been contact individuals of an organization.", "schema": "CREATE TABLE individuals (individual_last_name VARCHAR, individual_id VARCHAR); CREATE TABLE organization_contact_individuals (individual_id VARCHAR)", "sql": "SELECT DISTINCT t1.individual_last_name FROM individuals AS t1 JOIN organization_contact_individuals AS t2 ON t1.individual_id = t2.individual_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What are the names and total cargo weight of all shipments that have been delayed and originated from the 'North America' region?", "schema": "CREATE TABLE Warehouse (id INT, name TEXT, region TEXT); INSERT INTO Warehouse (id, name, region) VALUES (1, 'New York Warehouse', 'North America'), (2, 'Los Angeles Warehouse', 'North America'), (3, 'Sao Paulo Warehouse', 'South America'); CREATE TABLE Shipment (id INT, warehouse_id INT, cargo_weight INT, delivery_status TEXT); INSERT INTO Shipment (id, warehouse_id, cargo_weight, delivery_status) VALUES (1, 1, 5000, 'Delayed'), (2, 1, 3000, 'On Time'), (3, 2, 4000, 'Delayed'), (4, 3, 6000, 'On Time');", "sql": "SELECT Warehouse.name, SUM(Shipment.cargo_weight) as total_cargo_weight FROM Warehouse INNER JOIN Shipment ON Warehouse.id = Shipment.warehouse_id WHERE Warehouse.region = 'North America' AND Shipment.delivery_status = 'Delayed' GROUP BY Warehouse.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 253, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 305).", "schema": null, "sql": "insert into test_range_elem select i from generate_series(1,100) i;", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the price of renewable energy in France to 14.00.", "schema": "CREATE TABLE renewable_energy (country VARCHAR(20), price DECIMAL(5,2)); INSERT INTO renewable_energy (country, price) VALUES ('France', 12.50), ('France', 13.20), ('Germany', 10.00), ('Germany', 11.50);", "sql": "UPDATE renewable_energy SET price = 14.00 WHERE country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total weight of non-organic meat products in the FOOD_ITEMS table?", "schema": "CREATE TABLE FOOD_ITEMS (id INT, name VARCHAR(50), category VARCHAR(50), is_organic BOOLEAN, weight FLOAT); INSERT INTO FOOD_ITEMS (id, name, category, is_organic, weight) VALUES (1, 'Chicken Breast', 'Meat', false, 0.4), (2, 'Ground Beef', 'Meat', false, 0.8);", "sql": "SELECT SUM(weight) FROM FOOD_ITEMS WHERE is_organic = false AND category = 'Meat';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the minimum sustainability rating for any destination in the Pacific Islands with at least 500 visitors?", "schema": "CREATE TABLE PacificIslandsDestinations (destination_id INT, name VARCHAR(50), country VARCHAR(50), sustainability_rating INT, visitor_count INT); INSERT INTO PacificIslandsDestinations (destination_id, name, country, sustainability_rating, visitor_count) VALUES (1, 'Eco Retreat', 'Fiji', 4, 600); INSERT INTO PacificIslandsDestinations (destination_id, name, country, sustainability_rating, visitor_count) VALUES (2, 'Green Island', 'Tahiti', 5, 800);", "sql": "SELECT MIN(sustainability_rating) FROM PacificIslandsDestinations WHERE country IN ('Pacific Islands') AND visitor_count >= 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the total number of cybersecurity incidents reported for each type of incident in FY2021?", "schema": "CREATE TABLE FiscalYear (fiscal_year INT, year INT); INSERT INTO FiscalYear (fiscal_year, year) VALUES (2021, 2021), (2022, 2022), (2023, 2023); CREATE TABLE IncidentTypes (id INT, type VARCHAR(30)); INSERT INTO IncidentTypes (id, type) VALUES (1, 'Malware'), (2, 'Phishing'), (3, 'Ransomware'), (4, 'Data Breach'); CREATE TABLE Incidents (id INT, fiscal_year_id INT, incident_type_id INT, reported_date DATE, description TEXT); INSERT INTO Incidents (id, fiscal_year_id, incident_type_id, reported_date, description) VALUES (1, 1, 1, '2021-01-01', 'A computer was infected with malware.'), (2, 1, 3, '2021-02-15', 'A ransomware attack encrypted sensitive data.'), (3, 1, 2, '2021-03-05', 'A phishing email was sent to employees.'), (4, 1, 1, '2021-04-10', 'A server was compromised with malware.');", "sql": "SELECT i.type, COUNT(*) FROM Incidents i INNER JOIN FiscalYear fy ON i.fiscal_year_id = fy.id WHERE fy.year = 2021 GROUP BY i.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which college has a nose tackle position?", "schema": "CREATE TABLE table_name_30 (college VARCHAR, position VARCHAR)", "sql": "SELECT college FROM table_name_30 WHERE position = 'nose tackle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the game with North Melbourne as the away team take place?", "schema": "CREATE TABLE table_name_89 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_89 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the station in Toolamba that is currently demolished?", "schema": "CREATE TABLE table_name_26 (name VARCHAR, current_status VARCHAR, location VARCHAR)", "sql": "SELECT name FROM table_name_26 WHERE current_status = 'demolished' AND location = 'toolamba';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Time (EEST), when the Stage is ss8?", "schema": "CREATE TABLE table_name_5 (time__eest_ VARCHAR, stage VARCHAR)", "sql": "SELECT time__eest_ FROM table_name_5 WHERE stage = 'ss8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Display the names of all countries that have held AI safety conferences and the number of such conferences.", "schema": "CREATE TABLE countries (id INT, name TEXT); INSERT INTO countries (id, name) VALUES (1, 'USA'), (2, 'Canada'), (3, 'UK'), (4, 'Australia'); CREATE TABLE conferences (id INT, country_id INT, name TEXT, topic TEXT); INSERT INTO conferences (id, country_id, name, topic) VALUES (1, 1, 'Conf1', 'AI Safety'), (2, 1, 'Conf2', 'AI Safety'), (3, 3, 'Conf3', 'AI Safety'), (4, 4, 'Conf4', 'AI Safety');", "sql": "SELECT countries.name, COUNT(conferences.id) as conferences_count FROM countries INNER JOIN conferences ON countries.id = conferences.country_id WHERE conferences.topic = 'AI Safety' GROUP BY countries.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many engine b5204 t3?", "schema": "CREATE TABLE table_1147705_1 (engine_displacement VARCHAR, engine_type VARCHAR)", "sql": "SELECT COUNT(engine_displacement) FROM table_1147705_1 WHERE engine_type = 'B5204 T3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_buffercache' (example 40).", "schema": null, "sql": "SELECT * FROM pg_buffercache_mark_dirty_relation('temp_pg_buffercache');", "explanation": "Example query from the 'pg_buffercache' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game on 1 september?", "schema": "CREATE TABLE table_name_49 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_49 WHERE date = '1 september';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest rank when there are more than 2 games?", "schema": "CREATE TABLE table_name_52 (rank INTEGER, games INTEGER)", "sql": "SELECT MIN(rank) FROM table_name_52 WHERE games > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "pgTAP test for Fktap (assertion 60).", "schema": null, "sql": "-- Make sure check_test() works properly with no name argument.\nSELECT * FROM check_test(\n fk_ok( 'fk', 'pk_id', 'pk', 'id' ),\n true\n);", "explanation": "SQL assertion from pgTAP test suite for Fktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Attendance has an Opponent of @ oilers, and a Date of may 25?", "schema": "CREATE TABLE table_name_25 (attendance INTEGER, opponent VARCHAR, date VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_25 WHERE opponent = '@ oilers' AND date = 'may 25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the highest rebounds against detroit?", "schema": "CREATE TABLE table_name_65 (high_rebounds VARCHAR, team VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_65 WHERE team = 'detroit';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the minimum depth required for cold-water corals to grow?", "schema": "CREATE TABLE coral_reefs (reef_name TEXT, min_depth_m INT, max_depth_m INT); INSERT INTO coral_reefs (reef_name, min_depth_m, max_depth_m) VALUES ('Great Barrier Reef', 0, 50), ('Coral Garden', 200, 500), ('Midnight Express', 700, 1000);", "sql": "SELECT MIN(min_depth_m) FROM coral_reefs WHERE min_depth_m > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the daily trading volume for the USDC stablecoin on the Stellar network?", "schema": "CREATE TABLE stellar_usdc (transaction_id INT, volume DECIMAL, timestamp TIMESTAMP);", "sql": "SELECT SUM(volume) FROM stellar_usdc WHERE timestamp >= NOW() - INTERVAL '1 day' GROUP BY DATE(timestamp);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which year did he finish 11th?", "schema": "CREATE TABLE table_name_31 (year VARCHAR, result VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_31 WHERE result = '11th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In 1972, what was the highest number of points with a Ford engine and an entrant of Brooke Bond Oxo Team Surtees?", "schema": "CREATE TABLE table_name_41 (pts INTEGER, year VARCHAR, engine VARCHAR, entrant VARCHAR)", "sql": "SELECT MAX(pts) FROM table_name_41 WHERE engine = 'ford' AND entrant = 'brooke bond oxo team surtees' AND year = 1972;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year was Letecke Zavody N.P., Jinonice the manufacturer?", "schema": "CREATE TABLE table_name_90 (year VARCHAR, manufacturer VARCHAR)", "sql": "SELECT year FROM table_name_90 WHERE manufacturer = 'letecke zavody n.p., jinonice';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of hospitals and the total number of beds per state?", "schema": "CREATE TABLE hospitals (state varchar(2), num_hospitals int, num_beds int); INSERT INTO hospitals (state, num_hospitals, num_beds) VALUES ('NY', 201, 20100), ('CA', 486, 50000), ('TX', 413, 45000), ('FL', 214, 22000);", "sql": "SELECT state, SUM(num_hospitals) as total_hospitals, SUM(num_beds) as total_beds FROM hospitals GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Add a new aircraft manufacturing plant in Canada", "schema": "CREATE TABLE plants (plant_id INT PRIMARY KEY, name VARCHAR(100), city VARCHAR(50), country VARCHAR(50));", "sql": "INSERT INTO plants (plant_id, name, city, country) VALUES (4, 'Mirage Manufacturing', 'Toronto', 'Canada');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Which advocacy campaigns were launched in 'advocacy' table, and when, excluding campaigns launched in 2021?", "schema": "CREATE TABLE advocacy (id INT, campaign VARCHAR(50), launch_date DATE, end_date DATE); INSERT INTO advocacy (id, campaign, launch_date, end_date) VALUES (1, 'Child Rights', '2021-01-01', '2021-12-31'), (2, 'Gender Equality', '2021-02-01', '2021-12-31'), (3, 'Human Rights', '2020-01-01', '2020-12-31');", "sql": "SELECT campaign, launch_date FROM advocacy WHERE launch_date < '2021-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the round of 32 in conference usa", "schema": "CREATE TABLE table_10722506_6 (round_of_32 VARCHAR, conference VARCHAR)", "sql": "SELECT round_of_32 FROM table_10722506_6 WHERE conference = 'conference USA';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'truncate' (example 2).", "schema": null, "sql": "INSERT INTO truncate_a VALUES (1);", "explanation": "DML from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 90).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Aidan');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the total number of pallets handled by the 'Warehouse A'?", "schema": "CREATE TABLE Warehouse (name varchar(20), pallets_handled int); INSERT INTO Warehouse (name, pallets_handled) VALUES ('Warehouse A', 1500), ('Warehouse B', 2000);", "sql": "SELECT SUM(pallets_handled) FROM Warehouse WHERE name = 'Warehouse A';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total cost of the 'ProjectX'?", "schema": "CREATE TABLE ProjectCost (project_name TEXT, phase TEXT, cost INT); INSERT INTO ProjectCost (project_name, phase, cost) VALUES ('ProjectX', 'Phase1', 10000), ('ProjectX', 'Phase2', 20000), ('ProjectY', 'Phase1', 15000);", "sql": "SELECT SUM(cost) FROM ProjectCost WHERE project_name = 'ProjectX';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 160).", "schema": null, "sql": "SELECT jsonb_typeof('\"hello\"') AS string;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_typeof('\"hello\"') AS string) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the high points 31-27", "schema": "CREATE TABLE table_22669044_9 (high_points VARCHAR, record VARCHAR)", "sql": "SELECT high_points FROM table_22669044_9 WHERE record = '31-27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which artists have conducted the most workshops by region?", "schema": "CREATE TABLE ArtistWorkshops (id INT, artist_name VARCHAR(255), region VARCHAR(255), workshops INT); INSERT INTO ArtistWorkshops (id, artist_name, region, workshops) VALUES (1, 'Artist A', 'North', 5), (2, 'Artist B', 'South', 3), (3, 'Artist C', 'North', 7), (4, 'Artist D', 'East', 2);", "sql": "SELECT region, artist_name, SUM(workshops) FROM ArtistWorkshops GROUP BY region, artist_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Which artists have produced the most pieces?", "schema": "CREATE TABLE Artists (ArtistID int, Name varchar(50), Nationality varchar(50)); CREATE TABLE ArtPieces (ArtPieceID int, Title varchar(50), YearCreated int, ArtistID int);", "sql": "SELECT Artists.Name, COUNT(ArtPieces.ArtPieceID) AS ArtPiecesCount FROM Artists INNER JOIN ArtPieces ON Artists.ArtistID = ArtPieces.ArtistID GROUP BY Artists.Name ORDER BY ArtPiecesCount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 587).", "schema": null, "sql": "CREATE TABLE pub_testpart1.parent1 (a int) partition by list (a);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 65, "num_statements": 1} {"question": "What is the highest ticket price for a play in London?", "schema": "CREATE TABLE plays (title VARCHAR(255), location VARCHAR(255), price DECIMAL(5,2)); INSERT INTO plays (title, location, price) VALUES ('Hamilton', 'London', 250.00), ('Macbeth', 'London', 150.00), ('Romeo and Juliet', 'London', 120.00);", "sql": "SELECT MAX(price) FROM plays WHERE location = 'London';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.91.0--0.92.0 (assertion 28).", "schema": null, "sql": "-- view_owner_is ( schema, view, user )\nCREATE OR REPLACE FUNCTION view_owner_is ( NAME, NAME, NAME )\nRETURNS TEXT AS $$\n SELECT view_owner_is(\n $1, $2, $3,\n 'View ' || quote_ident($1) || '.' || quote_ident($2) || ' should be owned by ' || quote_ident($3)\n );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.91.0--0.92.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 296, "num_statements": 2} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 80).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (1,4,'454294299613767152878025320780.534199313974295807138790763501115780294529340799108297697573066187975311338382917022391830256203305238757334106943821060545424417350991354829668286194840925251162479496893943917530660694097932059166013476064988623431110002057735318529554555260199417935495388243829261809007709919225000608711536928171687251088217591210419208480251102484043683131687013687838713055660405381318396419588727500715930145098362997142075433472039319292466570912777345841400769387321465602989947078951135489852486382469990409873227894248208197179481868230244584527040573428134962626267135732247029762468417273891700661832893497067151409134724061246612631376075173287264787886064622106855886785805818642123776489793586531950438285720668411465570116161790343538663297713926678759640594912243360541590368666922379919514826022141331900181');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 881, "num_statements": 1} {"question": "How many hybrid cars are sold in Tokyo in the year 2022?", "schema": "CREATE TABLE car_sales(id INT, manufacturer VARCHAR(20), model VARCHAR(20), year INT, sale_type VARCHAR(20), units_sold INT);", "sql": "SELECT SUM(units_sold) FROM car_sales WHERE manufacturer = 'Toyota' AND model LIKE 'Prius%' AND city = 'Tokyo' AND year = 2022 AND sale_type = 'Retail';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "How many publications were made by graduate students in the Mathematics department in 2020?", "schema": "CREATE TABLE students (student_id INT, name VARCHAR(50), department VARCHAR(50), graduate_status VARCHAR(10)); INSERT INTO students VALUES (1, 'Alice Johnson', 'Mathematics', 'Graduate'); INSERT INTO students VALUES (2, 'Bob Brown', 'Mathematics', 'Undergraduate'); INSERT INTO students VALUES (3, 'Charlie Davis', 'Physics', 'Graduate'); CREATE TABLE publications (publication_id INT, student_id INT, year INT, title VARCHAR(100)); INSERT INTO publications VALUES (1, 1, 2019, 'Theory of Numbers'); INSERT INTO publications VALUES (2, 1, 2020, 'Calculus for Beginners'); INSERT INTO publications VALUES (3, 3, 2019, 'Quantum Mechanics');", "sql": "SELECT COUNT(*) FROM publications p INNER JOIN students s ON p.student_id = s.student_id WHERE s.department = 'Mathematics' AND s.graduate_status = 'Graduate' AND p.year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What is the total revenue of tours that promote cultural heritage preservation?", "schema": "CREATE TABLE tours (id INT, name VARCHAR(255), description TEXT, revenue FLOAT); INSERT INTO tours (id, name, description, revenue) VALUES (1, 'Historic Landmarks Tour', 'Discover cultural heritage sites in Paris.', 9000.00), (2, 'Traditional Villages Tour', 'Explore authentic Italian villages and their culture.', 8000.00);", "sql": "SELECT SUM(revenue) FROM tours WHERE description LIKE '%cultural heritage%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Money ( $ ), when Player is Greg Norman?", "schema": "CREATE TABLE table_name_1 (money___$__ VARCHAR, player VARCHAR)", "sql": "SELECT money___$__ FROM table_name_1 WHERE player = 'greg norman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the driver for june 23 and team of penske racing", "schema": "CREATE TABLE table_16493961_1 (driver VARCHAR, date VARCHAR, team VARCHAR)", "sql": "SELECT driver FROM table_16493961_1 WHERE date = 'June 23' AND team = 'Penske Racing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many entries are there for founded when the location was springfield, ohio?", "schema": "CREATE TABLE table_24195232_1 (founded VARCHAR, location VARCHAR)", "sql": "SELECT COUNT(founded) FROM table_24195232_1 WHERE location = 'Springfield, Ohio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the minimum age of candidates who ran for office in each election?", "schema": "CREATE TABLE election (year INT, candidates INT); CREATE TABLE candidate (name VARCHAR(255), age INT, election_year INT); INSERT INTO election (year, candidates) VALUES (2016, 10), (2018, 15), (2020, 20), (2022, 25), (2024, 30); INSERT INTO candidate (name, age, election_year) VALUES ('Candidate A', 40, 2016), ('Candidate B', 35, 2016), ('Candidate C', 45, 2018), ('Candidate D', 50, 2018), ('Candidate E', 55, 2020);", "sql": "SELECT election_year, MIN(age) FROM candidate GROUP BY election_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which state is frederick bamford a member of?", "schema": "CREATE TABLE table_name_24 (state VARCHAR, member VARCHAR)", "sql": "SELECT state FROM table_name_24 WHERE member = 'frederick bamford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the average manufacturing cost of aircrafts manufactured in the US?", "schema": "CREATE TABLE AircraftManufacturing(id INT, manufacturer VARCHAR(255), country VARCHAR(255), cost FLOAT); INSERT INTO AircraftManufacturing(id, manufacturer, country, cost) VALUES (1, 'Boeing', 'USA', 120000000), (2, 'Airbus', 'Europe', 150000000), (3, 'Lockheed Martin', 'USA', 180000000);", "sql": "SELECT AVG(cost) FROM AircraftManufacturing WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 116).", "schema": null, "sql": "INSERT INTO numrange_test2 VALUES(numrange(1.1, 2.2,'()'));", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 1010).", "schema": null, "sql": "SELECT factorial(15);", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT factorial(15)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "What is the average KDA ratio for each player in the 'gaming' tournament?", "schema": "CREATE TABLE players (id INT, name VARCHAR(50)); CREATE TABLE games (id INT, player_id INT, kills INT, deaths INT, assists INT); INSERT INTO players VALUES (1, 'Aarav Singh'); INSERT INTO players VALUES (2, 'Bella Rodriguez'); INSERT INTO games VALUES (1, 1, 12, 6, 8); INSERT INTO games VALUES (2, 1, 18, 4, 12); INSERT INTO games VALUES (3, 2, 7, 3, 2); INSERT INTO games VALUES (4, 2, 10, 5, 6);", "sql": "SELECT player_id, AVG( kills / NULLIF(deaths, 0) + assists / NULLIF(deaths, 0)) as avg_kda_ratio FROM games GROUP BY player_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest 09-10 i/o best?", "schema": "CREATE TABLE table_24990183_5 (Id VARCHAR)", "sql": "SELECT MIN(09 AS _10_i_o_best) FROM table_24990183_5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Place, when Score is \"75-71-72-70=288\"?", "schema": "CREATE TABLE table_name_33 (place VARCHAR, score VARCHAR)", "sql": "SELECT place FROM table_name_33 WHERE score = 75 - 71 - 72 - 70 = 288;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Delete a record from the 'habitats' table", "schema": "CREATE TABLE habitats (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), size FLOAT);", "sql": "DELETE FROM habitats WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What is the total CO2 emission per quarter for each mine?", "schema": "CREATE TABLE co2_emissions (mine_id INT, emission_date DATE, co2_amount INT); INSERT INTO co2_emissions (mine_id, emission_date, co2_amount) VALUES (1, '2021-01-01', 30000), (1, '2021-02-01', 32000), (1, '2021-03-01', 35000), (2, '2021-01-01', 28000), (2, '2021-02-01', 30000), (2, '2021-03-01', 33000), (3, '2021-01-01', 25000), (3, '2021-02-01', 27000), (3, '2021-03-01', 29000), (4, '2021-01-01', 22000), (4, '2021-02-01', 24000), (4, '2021-03-01', 26000);", "sql": "SELECT mine_id, DATE_TRUNC('quarter', emission_date) AS quarter, SUM(co2_amount) AS total_emission FROM co2_emissions GROUP BY mine_id, quarter ORDER BY mine_id, quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the minimum cost of manufacturing the aircraft models 'Falcon 9' and 'Falcon 1'?", "schema": "CREATE TABLE AircraftManufacturing(model VARCHAR(20), total_cost INT); INSERT INTO AircraftManufacturing VALUES('Falcon 1', 500000),('Falcon 9', 600000);", "sql": "SELECT MIN(total_cost) FROM AircraftManufacturing WHERE model IN ('Falcon 9', 'Falcon 1');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "List policy numbers and claim amounts for policyholders living in 'Ontario' who have filed a claim.", "schema": "CREATE TABLE Policies (PolicyNumber INT, PolicyholderID INT, PolicyState VARCHAR(20)); CREATE TABLE Claims (PolicyholderID INT, ClaimAmount DECIMAL(10,2), PolicyState VARCHAR(20)); INSERT INTO Policies (PolicyNumber, PolicyholderID, PolicyState) VALUES (2001, 9, 'Ontario'), (2002, 10, 'Ontario'); INSERT INTO Claims (PolicyholderID, ClaimAmount, PolicyState) VALUES (9, 800, 'Ontario'), (10, 900, 'Ontario');", "sql": "SELECT Policies.PolicyNumber, Claims.ClaimAmount FROM Policies JOIN Claims ON Policies.PolicyholderID = Claims.PolicyholderID WHERE Policies.PolicyState = 'Ontario';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 299).", "schema": null, "sql": "/****************************************************************************/\n-- Test is_partitioned().\nCREATE FUNCTION test_is_partitioned() RETURNS SETOF TEXT AS $$\nDECLARE\n tap record;\nBEGIN\n IF pg_version_num() >= 100000 THEN\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( '__SDFSDFD__' ),\n false,\n 'is_partitioned(non-existent part)',\n 'Table \"__SDFSDFD__\" should be partitioned',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( '__SDFSDFD__', 'howdy' ),\n false,\n 'is_partitioned(non-existent part, desc)',\n 'howdy',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( 'foo', '__SDFSDFD__', 'desc' ),\n false,\n 'is_partitioned(sch, non-existent part, desc)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( 'public', 'apart', 'desc' ),\n true,\n 'is_partitioned(sch, part, desc)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( 'public', 'apart'::name ),\n true,\n 'is_partitioned(sch, part)',\n 'Table public.apart should be partitioned',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( 'apart', 'yowza' ),\n true,\n 'is_partitioned(part, desc)',\n 'yowza',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n is_partitioned( 'apart' ),\n true,\n 'is_partitioned(part)',\n 'Table apart should be partitioned',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n ELSE\n FOR tap IN SELECT * FROM check_test(\n has_view( '__SDFSDFD__' ),\n false,\n 'is_partitioned(non-existent part)',\n 'View \"__SDFSDFD__\" should exist',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( '__SDFSDFD__', 'howdy' ),\n false,\n 'is_partitioned(non-existent part, desc)',\n 'howdy',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( 'foo', '__SDFSDFD__', 'desc' ),\n false,\n 'is_partitioned(sch, non-existent part, desc)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( 'information_schema', 'tables', 'desc' ),\n true,\n 'is_partitioned(sch, part, desc)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( 'information_schema', 'tables', 'desc' ),\n true,\n 'is_partitioned(sch, part)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( 'information_schema', 'tables', 'desc' ),\n true,\n 'is_partitioned(part, desc)',\n 'desc',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n has_view( 'pg_tables' ),\n true,\n 'is_partitioned(part)',\n 'View pg_tables should exist',\n ''\n ) AS b LOOP\n RETURN NEXT tap.b;\n END LOOP;\n END IF;\n RETURN;\nEND;\n$$ language PLPGSQL;", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 4195, "num_statements": 33} {"question": "Generate PostgreSQL SQL for: How much capacity has 1944 as the founded?", "schema": "CREATE TABLE table_name_84 (capacity VARCHAR, founded VARCHAR)", "sql": "SELECT COUNT(capacity) FROM table_name_84 WHERE founded = 1944;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 149).", "schema": null, "sql": "select pg_get_viewdef('vw_rngfunc');", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_viewdef('vw_rngfunc')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many figures are provided for Weeks' field goals?", "schema": "CREATE TABLE table_25711913_8 (field_goals VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(field_goals) FROM table_25711913_8 WHERE player = 'Weeks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What shareholder has 2.55 percent of votes?", "schema": "CREATE TABLE table_206419_3 (shareholder VARCHAR, percent_of_votes VARCHAR)", "sql": "SELECT shareholder FROM table_206419_3 WHERE percent_of_votes = '2.55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many cybersecurity incidents were reported in the defense industry in the last 6 months?", "schema": "CREATE TABLE Cybersecurity_Incidents_2 (id INT, industry VARCHAR(50), year INT, month INT, reported_count INT);", "sql": "SELECT SUM(reported_count) FROM Cybersecurity_Incidents_2 WHERE industry = 'defense' AND year = YEAR(CURRENT_DATE) AND month >= MONTH(CURRENT_DATE) - 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What was the total organic matter (in kg) in soil samples from each region in 2020?", "schema": "CREATE TABLE soil_samples (id INT, region_id INT, organic_matter_kg FLOAT, date DATE);", "sql": "SELECT region_id, SUM(organic_matter_kg) FROM soil_samples WHERE YEAR(date) = 2020 GROUP BY region_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Find the total revenue for vegetarian dishes across all restaurants.", "schema": "CREATE TABLE restaurants (id INT, name VARCHAR(255)); INSERT INTO restaurants (id, name) VALUES (1, 'Restaurant A'), (2, 'Restaurant B'); CREATE TABLE dishes (id INT, name VARCHAR(255), type VARCHAR(255), revenue INT, restaurant_id INT); INSERT INTO dishes (id, name, type, revenue, restaurant_id) VALUES (1, 'Quinoa Salad', 'vegetarian', 500, 1), (2, 'Chickpea Curry', 'vegetarian', 800, 1), (3, 'Cheeseburger', 'non-vegetarian', 1200, 1), (4, 'Pizza Margherita', 'vegetarian', 700, 2), (5, 'Fish and Chips', 'non-vegetarian', 1500, 2);", "sql": "SELECT SUM(revenue) FROM dishes WHERE type = 'vegetarian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On September 10, 1989 how many people attended the game?", "schema": "CREATE TABLE table_name_65 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT attendance FROM table_name_65 WHERE date = 'september 10, 1989';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many users have no preference for any finish?", "schema": "CREATE TABLE user_preference (id INT, user_id INT, product_id INT, finish VARCHAR(50), PRIMARY KEY (id)); INSERT INTO user_preference (id, user_id, product_id, finish) VALUES (1, 1, 1, 'Matte'), (2, 2, 1, 'Matte'), (3, 3, 2, 'Gloss'), (4, 4, NULL, NULL);", "sql": "SELECT COUNT(*) as no_preference_users FROM user_preference WHERE finish IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Insert a new 'oil_platform' record for 'Blue Star Inc.' off the coast of Nigeria", "schema": "CREATE TABLE oil_platform (id INT PRIMARY KEY, name TEXT, operator TEXT, location TEXT, depth FLOAT);", "sql": "INSERT INTO oil_platform (name, operator, location, depth) VALUES ('Poseidon', 'Blue Star Inc.', 'Offshore Nigeria', 1200.5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "How many albums were released each year by music artists?", "schema": "CREATE TABLE Music_Albums (id INT, title VARCHAR(100), release_year INT, artist VARCHAR(100)); INSERT INTO Music_Albums (id, title, release_year, artist) VALUES (1, 'Back in Black', 1980, 'AC/DC'), (2, 'Thriller', 1982, 'Michael Jackson'), (3, 'The Dark Side of the Moon', 1973, 'Pink Floyd');", "sql": "SELECT release_year, COUNT(*) FROM Music_Albums GROUP BY release_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What digital reaction has a hot 100 reaction of 4 (+4)?", "schema": "CREATE TABLE table_name_81 (hot_digital_songs_reaction VARCHAR, hot_100_reaction VARCHAR)", "sql": "SELECT hot_digital_songs_reaction FROM table_name_81 WHERE hot_100_reaction = '4 (+4)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the maximum number of wins for a player from Japan, for games that started in the last 60 days?", "schema": "CREATE TABLE games (game_id INT, player_id INT, game_date DATE, wins INT); CREATE TABLE players (player_id INT, player_country VARCHAR(255));", "sql": "SELECT MAX(wins) FROM games JOIN players ON games.player_id = players.player_id WHERE players.player_country = 'Japan' AND games.game_date >= (CURRENT_DATE - INTERVAL '60' DAY);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 6).", "schema": null, "sql": "CREATE FUNCTION ltree_le(ltree,ltree)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the average age of female workers in the 'mining_operations' table?", "schema": "CREATE TABLE mining_operations (id INT, name VARCHAR(50), position VARCHAR(50), age INT);", "sql": "SELECT AVG(age) FROM mining_operations WHERE position = 'worker' AND gender = 'female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "pgTAP test for Roletap (assertion 15).", "schema": null, "sql": "SELECT * FROM check_test(\n roles_are( array_append(___myroles(current_role), '__howdy__'), 'whatever' ),\n false,\n 'roles_are(roles, desc) missing and extras',\n 'whatever',\n ' Extra roles:\n ' || quote_ident(current_role) || '\n Missing roles:\n __howdy__'\n);", "explanation": "SQL assertion from pgTAP test suite for Roletap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 290, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Run 4 has a Run 1 of 57.37, and a Run 3 smaller than 57.45?", "schema": "CREATE TABLE table_name_32 (run_4 INTEGER, run_1 VARCHAR, run_3 VARCHAR)", "sql": "SELECT SUM(run_4) FROM table_name_32 WHERE run_1 = 57.37 AND run_3 < 57.45;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What CFL teams are part of Simon Fraser college?", "schema": "CREATE TABLE table_10975034_4 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT cfl_team FROM table_10975034_4 WHERE college = 'Simon Fraser';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the minimum health equity metric score achieved by any community?", "schema": "CREATE TABLE community_health_equity (community_id INT, score INT); INSERT INTO community_health_equity (community_id, score) VALUES (1, 85), (2, 90), (3, 80), (4, 95), (5, 88);", "sql": "SELECT MIN(score) FROM community_health_equity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'brin' (example 71).", "schema": null, "sql": "INSERT INTO brintest_2 VALUES ('(-1, 0)');", "explanation": "DML from PostgreSQL core regression test for Brin.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which member has an Electorate of kennedy?", "schema": "CREATE TABLE table_name_50 (member VARCHAR, electorate VARCHAR)", "sql": "SELECT member FROM table_name_50 WHERE electorate = 'kennedy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years correspond to longitude of 36.8e and diameter greater than 697?", "schema": "CREATE TABLE table_name_72 (year_named VARCHAR, longitude VARCHAR, diameter__km_ VARCHAR)", "sql": "SELECT COUNT(year_named) FROM table_name_72 WHERE longitude = '36.8e' AND diameter__km_ > 697;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the scores when Game had a Record of 17-29?", "schema": "CREATE TABLE table_name_27 (game INTEGER, record VARCHAR)", "sql": "SELECT SUM(game) FROM table_name_27 WHERE record = '17-29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total cost of all projects in 'Bridge' category?", "schema": "CREATE TABLE projects (id INT, name VARCHAR(255), category VARCHAR(255), cost FLOAT); INSERT INTO projects (id, name, category, cost) VALUES (1, 'Sample Bridge', 'Bridge', 5000000);", "sql": "SELECT SUM(cost) FROM projects WHERE category = 'Bridge';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "How many individuals were impacted by access to justice initiatives in Latin America in 2020?", "schema": "CREATE TABLE initiatives (initiative_id INT, year INT, individuals_impacted INT); INSERT INTO initiatives (initiative_id, year, individuals_impacted) VALUES (1, 2018, 500), (2, 2020, 800); CREATE TABLE locations (initiative_id INT, region VARCHAR(20)); INSERT INTO locations (initiative_id, region) VALUES (1, 'North America'), (2, 'Latin America');", "sql": "SELECT SUM(initiatives.individuals_impacted) FROM initiatives INNER JOIN locations ON initiatives.initiative_id = locations.initiative_id WHERE locations.region = 'Latin America' AND initiatives.year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Find the number of unique mental health conditions that have been treated in each region.", "schema": "CREATE TABLE treatments (id INT, condition_id INT, region VARCHAR(50)); INSERT INTO treatments (id, condition_id, region) VALUES (1, 1, 'Asia'), (2, 1, 'Europe'), (3, 2, 'Asia'), (4, 2, 'Europe'), (5, 3, 'Asia');", "sql": "SELECT region, COUNT(DISTINCT condition_id) FROM treatments GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the minimum age of all country artists in the database?", "schema": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(100), artist_age INT, genre VARCHAR(50)); INSERT INTO artists VALUES (1, 'Artist A', 35, 'Country'); INSERT INTO artists VALUES (2, 'Artist B', 28, 'Country'); INSERT INTO artists VALUES (3, 'Artist C', 45, 'Pop');", "sql": "SELECT MIN(artist_age) FROM artists WHERE genre = 'Country';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team at the Football Park match?", "schema": "CREATE TABLE table_name_18 (away_team VARCHAR, ground VARCHAR)", "sql": "SELECT away_team FROM table_name_18 WHERE ground = 'football park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the inspection_date for record with restaurant_id 123 to be '2022-08-01' in the food_inspections table", "schema": "CREATE TABLE food_inspections (restaurant_id INT, inspection_date DATE);", "sql": "UPDATE food_inspections SET inspection_date = '2022-08-01' WHERE restaurant_id = 123;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 64).", "schema": null, "sql": "SELECT '-123456789012345678'::money;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '-123456789012345678'::money) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Which cities in Japan have populations over 2 million, and what are their populations?", "schema": "CREATE TABLE japan_cities (name TEXT, population INTEGER); INSERT INTO japan_cities (name, population) VALUES ('Tokyo', 37400068), ('Yokohama', 3668000);", "sql": "SELECT name, population FROM japan_cities WHERE population > 2000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What are the traditional dances, their origins, and the communities that perform them in South America?", "schema": "CREATE TABLE Dances (id INT, name TEXT, origin TEXT); INSERT INTO Dances (id, name, origin) VALUES (1, 'Samba', 'Brazil'); CREATE TABLE Communities (id INT, dance_id INT, name TEXT); INSERT INTO Communities (id, dance_id, name) VALUES (1, 1, 'Cariocas');", "sql": "SELECT D.name, D.origin, C.name FROM Dances D INNER JOIN Communities C ON D.id = C.dance_id WHERE C.name = 'Cariocas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the Hamburg Masters Tournament, during which Jiří Novák was absent(A) in 1996, how did he do in 2003?", "schema": "CREATE TABLE table_name_71 (tournament VARCHAR)", "sql": "SELECT 2003 FROM table_name_71 WHERE 1996 = 'a' AND tournament = 'hamburg masters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What train number is heading to amritsar?", "schema": "CREATE TABLE table_12095519_1 (train_no VARCHAR, destination VARCHAR)", "sql": "SELECT train_no FROM table_12095519_1 WHERE destination = 'Amritsar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the algorithm with a directed/undirected of both and a subgraph-centric basis?", "schema": "CREATE TABLE table_name_12 (name VARCHAR, directed___undirected VARCHAR, basis VARCHAR)", "sql": "SELECT name FROM table_name_12 WHERE directed___undirected = 'both' AND basis = 'subgraph-centric';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Which organizations focus on ethical AI in Africa?", "schema": "CREATE TABLE organization (name VARCHAR(50), focus VARCHAR(50), location VARCHAR(50)); INSERT INTO organization (name, focus, location) VALUES ('AI for Good', 'Ethical AI', 'Kenya'), ('TechEthics Africa', 'Ethical AI', 'South Africa'), ('Data Justice NGO', 'Data Ethics', 'Nigeria');", "sql": "SELECT name FROM organization WHERE focus = 'Ethical AI' AND location = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List the number of mental health parity violations by state in 2021 and 2022.", "schema": "CREATE TABLE MentalHealthParityViolations (ViolationId INT, Year INT, State VARCHAR(255)); INSERT INTO MentalHealthParityViolations (ViolationId, Year, State) VALUES (1, 2021, 'California'); INSERT INTO MentalHealthParityViolations (ViolationId, Year, State) VALUES (2, 2021, 'Texas'); INSERT INTO MentalHealthParityViolations (ViolationId, Year, State) VALUES (3, 2022, 'New York'); INSERT INTO MentalHealthParityViolations (ViolationId, Year, State) VALUES (4, 2022, 'Florida');", "sql": "SELECT Year, State, COUNT(*) FROM MentalHealthParityViolations WHERE Year IN (2021, 2022) GROUP BY Year, State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most amount of goals any of the players had?", "schema": "CREATE TABLE table_23963781_3 (goals INTEGER)", "sql": "SELECT MAX(goals) FROM table_23963781_3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Insert a new volunteer record for Carlos with country Mexico.", "schema": "CREATE TABLE Volunteers (id INT, name VARCHAR(255), country VARCHAR(255));", "sql": "INSERT INTO Volunteers (name, country) VALUES ('Carlos', 'Mexico');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the distinct majors of students who have treasurer votes.", "schema": "CREATE TABLE VOTING_RECORD (Treasurer_Vote VARCHAR); CREATE TABLE STUDENT (Major VARCHAR, StuID VARCHAR)", "sql": "SELECT DISTINCT T1.Major FROM STUDENT AS T1 JOIN VOTING_RECORD AS T2 ON T1.StuID = T2.Treasurer_Vote;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the record(s) for the team with a winning percentage of .464?", "schema": "CREATE TABLE table_15313204_1 (records VARCHAR, pct VARCHAR)", "sql": "SELECT records FROM table_15313204_1 WHERE pct = '.464';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--4.6.1--4.6.2, item 1).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.apply_constraints(p_parent_table text, p_child_table text DEFAULT NULL, p_analyze boolean DEFAULT FALSE, p_job_id bigint DEFAULT NULL) RETURNS void\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_child_exists text;\nv_child_tablename text;\nv_col text;\nv_constraint_cols text[];\nv_constraint_col_type text;\nv_constraint_name text;\nv_constraint_valid boolean;\nv_constraint_values record;\nv_control text;\nv_control_type text;\nv_datetime_string text;\nv_epoch text;\nv_existing_constraint_name text;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_last_partition text;\nv_last_partition_id bigint;\nv_last_partition_timestamp timestamptz;\nv_max_id bigint;\nv_max_timestamp timestamptz;\nv_new_search_path text;\nv_old_search_path text;\nv_optimize_constraint int;\nv_parent_schema text;\nv_parent_table text;\nv_parent_tablename text;\nv_partition_interval text;\nv_partition_suffix text;\nv_premake int;\nv_sql text;\nv_step_id bigint;\nv_suffix_position int;\nv_type text;\n\nBEGIN\n/*\n * Apply constraints managed by partman extension\n */\n\nSELECT parent_table\n , partition_type\n , control\n , premake\n , partition_interval\n , optimize_constraint\n , epoch\n , datetime_string\n , constraint_cols\n , jobmon\n , constraint_valid\nINTO v_parent_table\n , v_type\n , v_control\n , v_premake\n , v_partition_interval\n , v_optimize_constraint\n , v_epoch\n , v_datetime_string\n , v_constraint_cols\n , v_jobmon\n , v_constraint_valid\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table\nAND constraint_cols IS NOT NULL;\n\nIF v_constraint_cols IS NULL THEN\n RAISE DEBUG 'apply_constraints: Given parent table (%) not set up for constraint management (constraint_cols is NULL)', p_parent_table;\n -- Returns silently to allow this function to be simply called by maintenance processes without having to check if config options are set.\n RETURN;\nEND IF;\n\nSELECT schemaname, tablename\nINTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(v_parent_table, '.', 1)::name\nAND tablename = split_part(v_parent_table, '.', 2)::name;\n\nSELECT general_type INTO v_control_type FROM @extschema@.check_control_type(v_parent_schema, v_parent_tablename, v_control);\n\nSELECT current_setting('search_path') INTO v_old_search_path;\nIF length(v_old_search_path) > 0 THEN\n v_new_search_path := '@extschema@,pg_temp,'||v_old_search_path;\nELSE\n v_new_search_path := '@extschema@,pg_temp';\nEND IF;\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon'::name AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n v_new_search_path := format('%s,%s',v_jobmon_schema, v_new_search_path);\n END IF;\nEND IF;\nEXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_new_search_path, 'false');\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF p_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN CREATE CONSTRAINT: %s', v_parent_table));\n ELSE\n v_job_id = p_job_id;\n END IF;\nEND IF;\n\n-- If p_child_table is null, figure out the partition that is the one right before the optimize_constraint value backwards.\nIF p_child_table IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Applying additional constraints: Automatically determining most recent child on which to apply constraints');\n END IF;\n\n SELECT partition_tablename INTO v_last_partition FROM @extschema@.show_partitions(v_parent_table, 'DESC') LIMIT 1;\n\n IF v_control_type = 'time' OR (v_control_type = 'id' AND v_epoch <> 'none') THEN\n SELECT child_start_time INTO v_last_partition_timestamp FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_partition_interval, v_parent_table);\n v_partition_suffix := to_char(v_last_partition_timestamp - (v_partition_interval::interval * (v_optimize_constraint + v_premake + 1) ), v_datetime_string);\n ELSIF v_control_type = 'id' THEN\n SELECT child_start_id INTO v_last_partition_id FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_partition_interval, v_parent_table);\n v_partition_suffix := (v_last_partition_id - (v_partition_interval::bigint * (v_optimize_constraint + v_premake + 1) ))::text;\n END IF;\n\n RAISE DEBUG 'apply_constraint: v_parent_tablename: %, v_last_partition: %, v_last_partition_timestamp: %, v_partition_suffix: %'\n , v_parent_tablename, v_last_partition, v_last_partition_timestamp, v_partition_suffix;\n\n v_child_tablename := @extschema@.check_name_length(v_parent_tablename, v_partition_suffix, TRUE);\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('Target child table: %s.%s', v_parent_schema, v_child_tablename));\n END IF;\nELSE\n v_child_tablename = split_part(p_child_table, '.', 2);\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Applying additional constraints: Checking if target child table exists');\nEND IF;\n\nSELECT tablename FROM pg_catalog.pg_tables INTO v_child_exists WHERE schemaname = v_parent_schema::name AND tablename = v_child_tablename::name;\nIF v_child_exists IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Target child table (%s) does not exist. Skipping constraint creation.', v_child_tablename));\n IF p_job_id IS NULL THEN\n PERFORM close_job(v_job_id);\n END IF;\n END IF;\n RAISE DEBUG 'Target child table (%) does not exist. Skipping constraint creation.', v_child_tablename;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n RETURN;\nELSE\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nFOREACH v_col IN ARRAY v_constraint_cols\nLOOP\n SELECT con.conname\n INTO v_existing_constraint_name\n FROM pg_catalog.pg_constraint con\n JOIN pg_class c ON c.oid = con.conrelid\n JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\n JOIN pg_catalog.pg_attribute a ON con.conrelid = a.attrelid\n WHERE c.relname = v_child_tablename::name\n AND n.nspname = v_parent_schema::name\n AND con.conname LIKE 'partmanconstr_%'\n AND con.contype = 'c'\n AND a.attname = v_col::name\n AND ARRAY[a.attnum] OPERATOR(pg_catalog.<@) con.conkey\n AND a.attisdropped = false;\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying additional constraints: Applying new constraint on column: %s', v_col));\n END IF;\n\n IF v_existing_constraint_name IS NOT NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Partman managed constraint already exists on this table (%s) and column (%s). Skipping creation.', v_child_tablename, v_col));\n END IF;\n RAISE DEBUG 'Partman managed constraint already exists on this table (%) and column (%). Skipping creation.', v_child_tablename, v_col ;\n CONTINUE;\n END IF;\n\n -- Ensure column name gets put on end of constraint name to help avoid naming conflicts\n v_constraint_name := @extschema@.check_name_length('partmanconstr_'||v_child_tablename, p_suffix := '_'||v_col);\n\n EXECUTE format('SELECT min(%I)::text AS min, max(%I)::text AS max FROM %I.%I', v_col, v_col, v_parent_schema, v_child_tablename) INTO v_constraint_values;\n\n IF v_constraint_values IS NOT NULL THEN\n v_sql := format('ALTER TABLE %I.%I ADD CONSTRAINT %I CHECK (%I >= %L AND %I <= %L)'\n , v_parent_schema\n , v_child_tablename\n , v_constraint_name\n , v_col\n , v_constraint_values.min\n , v_col\n , v_constraint_values.max);\n\n IF v_constraint_valid = false THEN\n v_sql := format('%s NOT VALID', v_sql);\n END IF;\n\n RAISE DEBUG 'Constraint creation query: %', v_sql;\n EXECUTE v_sql;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('New constraint created: %s', v_sql));\n END IF;\n ELSE\n RAISE DEBUG 'Given column (%) contains all NULLs. No constraint created', v_col;\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Given column (%s) contains all NULLs. No constraint created', v_col));\n END IF;\n END IF;\n\nEND LOOP;\n\nIF p_analyze THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying additional constraints: Running analyze on partition set: %s', v_parent_table));\n END IF;\n RAISE DEBUG 'Running analyze on partition set: %', v_parent_table;\n EXECUTE format('ANALYZE %I.%I', v_parent_schema, v_parent_tablename);\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\nEND IF;\n\nEXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN CREATE CONSTRAINT: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 11252, "num_statements": 126} {"question": "List all distinct risk assessment models and their average scores", "schema": "CREATE TABLE RiskAssessment (ModelID INT, ModelName VARCHAR(50), PolicyholderID INT, Score INT); INSERT INTO RiskAssessment (ModelID, ModelName, PolicyholderID, Score) VALUES (1, 'Standard', 1, 80), (2, 'Comprehensive', 2, 85), (3, 'Standard', 3, 90), (4, 'Comprehensive', 4, 75), (5, 'Standard', 5, 95), (6, 'Comprehensive', 6, 80);", "sql": "SELECT ModelName, AVG(Score) FROM RiskAssessment GROUP BY ModelName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show total hours billed and total billing amount for each attorney in 'billing' table", "schema": "CREATE TABLE billing (attorney_id INT, client_id INT, hours_billed INT, billing_rate DECIMAL(5,2));", "sql": "SELECT attorney_id, SUM(hours_billed), SUM(hours_billed * billing_rate) FROM billing GROUP BY attorney_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total water consumption for the state of California in the last quarter?", "schema": "CREATE TABLE MonthlyWaterUsage (Month DATE, State VARCHAR(20), Usage FLOAT); INSERT INTO MonthlyWaterUsage (Month, State, Usage) VALUES ('2022-01-01', 'California', 2500), ('2022-02-01', 'California', 3000), ('2022-03-01', 'California', 3500);", "sql": "SELECT SUM(Usage) FROM MonthlyWaterUsage WHERE State = 'California' AND Month >= DATEADD(QUARTER, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most attendance for 25 january 2004", "schema": "CREATE TABLE table_name_64 (attendance INTEGER, date VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_64 WHERE date = '25 january 2004';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average for the player with BBM 5/85?", "schema": "CREATE TABLE table_27922491_8 (average VARCHAR, bbm VARCHAR)", "sql": "SELECT average FROM table_27922491_8 WHERE bbm = '5/85';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the number of research projects in each department, broken down by department?", "schema": "CREATE TABLE research_projects (id INT, title VARCHAR(255), department VARCHAR(100), funding DECIMAL(10,2)); INSERT INTO research_projects (id, title, department, funding) VALUES (1, 'Robotics Project', 'Engineering', 50000.00), (2, 'Theory Project', 'Mathematics', 0.00), (3, 'AI Project', 'Computer Science', 80000.00);", "sql": "SELECT department, COUNT(*) as project_count FROM research_projects GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Which claims had a payment amount greater than $1000 in Texas?", "schema": "CREATE TABLE ClaimsData (ClaimID INT, Payment DECIMAL(5,2), State VARCHAR(20)); INSERT INTO ClaimsData VALUES (1, 500.00, 'California'), (2, 1500.00, 'Texas'), (3, 800.00, 'California');", "sql": "SELECT ClaimID, Payment FROM ClaimsData WHERE State = 'Texas' AND Payment > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum balance for customers from Germany?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO customers (id, name, country) VALUES (1, 'John Doe', 'USA'), (2, 'Jane Smith', 'Canada'), (3, 'Jim Brown', 'UK'), (4, 'Heidi Klum', 'Germany'); CREATE TABLE accounts (id INT, customer_id INT, balance DECIMAL(10, 2)); INSERT INTO accounts (id, customer_id, balance) VALUES (1, 1, 12000.00), (2, 1, 8000.00), (3, 2, 5000.00), (4, 4, 20000.00);", "sql": "SELECT MAX(a.balance) FROM accounts a JOIN customers c ON a.customer_id = c.id WHERE c.country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 376).", "schema": null, "sql": "SELECT regexp_split_to_table('hello world'::citext, E'\\\\s+') AS words;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue when the result was position of 17th?", "schema": "CREATE TABLE table_name_27 (venue VARCHAR, position VARCHAR)", "sql": "SELECT venue FROM table_name_27 WHERE position = '17th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 141).", "schema": null, "sql": "INSERT INTO UNIQUE_TBL (t) VALUES ('seven');", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'collate': Write the SELECT query (example 10).", "schema": null, "sql": "SELECT * FROM collate_test1 WHERE b >= 'abc' COLLATE \"C\";", "explanation": "Regression test for Collate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM collate_test1 WHERE b >= 'abc' COLLATE \"C\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL Mvcc: show example 9.", "schema": null, "sql": "SELECT pg_advisory_lock(id) FROM foo WHERE id = 12345; -- ok SELECT pg_advisory_lock(id) FROM foo WHERE id > 12345 LIMIT 100; -- danger! SELECT pg_advisory_lock(q.id) FROM ( SELECT id FROM foo WHERE id > 12345 LIMIT 100 ) q; -- ok;", "explanation": "Example from PostgreSQL documentation on Mvcc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 4} {"question": "How many infectious disease cases were reported per month in 2021, in Texas?", "schema": "CREATE TABLE infectious_diseases (id INT, state VARCHAR(2), report_date DATE, disease_type VARCHAR(20)); INSERT INTO infectious_diseases (id, state, report_date, disease_type) VALUES (1, 'TX', '2021-01-15', 'COVID-19'); INSERT INTO infectious_diseases (id, state, report_date, disease_type) VALUES (2, 'CA', '2021-02-20', 'Influenza'); INSERT INTO infectious_diseases (id, state, report_date, disease_type) VALUES (3, 'TX', '2021-03-05', 'Measles');", "sql": "SELECT MONTH(report_date) as month, COUNT(*) as cases FROM infectious_diseases WHERE state = 'TX' AND YEAR(report_date) = 2021 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'float4' (example 18).", "schema": null, "sql": "INSERT INTO FLOAT4_TBL(f1) VALUES ('-10e-400');", "explanation": "DML from PostgreSQL core regression test for Float4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What rank did the nation with athlete peter snell category:articles with hcards have?", "schema": "CREATE TABLE table_22355_11 (rank VARCHAR, athlete VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_22355_11 WHERE athlete = 'Peter Snell Category:Articles with hCards';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the average depth of all trenches in the Pacific Ocean?", "schema": "CREATE TABLE ocean_trenches (name VARCHAR(50), location VARCHAR(50), avg_depth FLOAT); INSERT INTO ocean_trenches", "sql": "SELECT AVG(avg_depth) FROM ocean_trenches WHERE location = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Away Team score of North Melbourne?", "schema": "CREATE TABLE table_name_24 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_24 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average claim amount by policy type and month?", "schema": "CREATE TABLE Claims (PolicyID INT, PolicyType VARCHAR(255), ClaimAmount DECIMAL(10, 2), ClaimDate DATE); INSERT INTO Claims VALUES (1, 'Auto', 500, '2022-01-05'), (2, 'Home', 1000, '2022-02-10'), (3, 'Auto', 750, '2022-03-15'), (4, 'Home', 1200, '2022-01-25'), (5, 'Auto', 300, '2022-02-01'), (6, 'Home', 1500, '2022-03-01');", "sql": "SELECT PolicyType, EXTRACT(MONTH FROM ClaimDate) AS Month, AVG(ClaimAmount) AS AvgClaimAmount FROM Claims GROUP BY PolicyType, Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What was the total revenue for eco-friendly garments in India in Q2 2021?", "schema": "CREATE TABLE india_eco_friendly_garments (garment_type VARCHAR(255), sales_amount DECIMAL(10,2), quarter INT, year INT); INSERT INTO india_eco_friendly_garments (garment_type, sales_amount, quarter, year) VALUES ('Shirt', 700.00, 2, 2021), ('Pants', 800.00, 2, 2021);", "sql": "SELECT SUM(sales_amount) FROM india_eco_friendly_garments WHERE quarter = 2 AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 8).", "schema": null, "sql": "select encode(decrypt_iv('\\x384a970695ce016a', '0123456', 'abcd', 'cast5'), 'escape');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average revenue per night for hotels in Spain?", "schema": "CREATE TABLE hotels (id INT, name TEXT, country TEXT, is_eco_friendly BOOLEAN, daily_revenue INT); INSERT INTO hotels (id, name, country, is_eco_friendly, daily_revenue) VALUES (1, 'Barcelona Eco Hotel', 'Spain', true, 300), (2, 'Madrid Hotel', 'Spain', false, 250), (3, 'Ibiza Green Hotel', 'Spain', true, 400);", "sql": "SELECT AVG(daily_revenue) FROM hotels WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 250).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (5,9,'-408725765384.257043660243220');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the risk score for each policyholder?", "schema": "See context", "sql": "SELECT * FROM policyholder_risk_scores;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the candidate for south carolina 4?", "schema": "CREATE TABLE table_1342256_40 (candidates VARCHAR, district VARCHAR)", "sql": "SELECT candidates FROM table_1342256_40 WHERE district = 'South Carolina 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Identify the programs with no funding", "schema": "CREATE TABLE program_funding_2 (program_id INT, amount DECIMAL(10,2)); INSERT INTO program_funding_2 (program_id, amount) VALUES (1, 5000.00), (2, 7000.00), (4, 3000.00);", "sql": "SELECT p.name FROM programs p LEFT JOIN program_funding_2 f ON p.id = f.program_id WHERE f.program_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tournament had the result of a W in 2009?", "schema": "CREATE TABLE table_name_67 (tournament VARCHAR)", "sql": "SELECT tournament FROM table_name_67 WHERE 2009 = 'w';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What's the total investment amount for 'renewable energy' and 'sustainable agriculture' strategies combined, by year?", "schema": "CREATE TABLE investments_strategies_amount (id INT, investment_year INT, strategy VARCHAR(30), investment_amount FLOAT); INSERT INTO investments_strategies_amount (id, investment_year, strategy, investment_amount) VALUES (1, 2019, 'renewable energy', 120000), (2, 2020, 'sustainable agriculture', 185000), (3, 2018, 'renewable energy', 175000);", "sql": "SELECT investment_year, SUM(investment_amount) FROM investments_strategies_amount WHERE strategy IN ('renewable energy', 'sustainable agriculture') GROUP BY investment_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player has a date of 12-02-2003?", "schema": "CREATE TABLE table_name_98 (player VARCHAR, date VARCHAR)", "sql": "SELECT player FROM table_name_98 WHERE date = '12-02-2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average social impact score by region?", "schema": "CREATE TABLE social_impact_data (id INT, region VARCHAR(50), score INT); INSERT INTO social_impact_data (id, region, score) VALUES (1, 'Northeast', 80), (2, 'Southeast', 85), (3, 'Midwest', 75), (4, 'West', 90);", "sql": "SELECT region, AVG(score) as avg_score FROM social_impact_data GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score was given at the August 17, 2005 match?", "schema": "CREATE TABLE table_name_47 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_47 WHERE date = 'august 17, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the cable-stayed Badong Bridge open in 2005?", "schema": "CREATE TABLE table_name_18 (location VARCHAR, name VARCHAR, type VARCHAR, opened VARCHAR)", "sql": "SELECT location FROM table_name_18 WHERE type = 'cable-stayed' AND opened = 2005 AND name = 'badong bridge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Who are the most influential artists in the 'Influential_Artists' table?", "schema": "CREATE TABLE Influential_Artists (artist_id INT, artist_name VARCHAR(255), influence_score FLOAT);", "sql": "SELECT artist_name FROM Influential_Artists ORDER BY influence_score DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Please show each industry and the corresponding number of companies in that industry.", "schema": "CREATE TABLE Companies (Industry VARCHAR)", "sql": "SELECT Industry, COUNT(*) FROM Companies GROUP BY Industry;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the enrollment ratio for preschool in the region where enrollment ratio for tertiary is 29.55?", "schema": "CREATE TABLE table_25042332_22 (preschool__0_5_years_ VARCHAR, tertiary__18_24_years_ VARCHAR)", "sql": "SELECT preschool__0_5_years_ FROM table_25042332_22 WHERE tertiary__18_24_years_ = '29.55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the maximum number of members in a union?", "schema": "CREATE TABLE union_membership (id INT, union VARCHAR(20), member_count INT); INSERT INTO union_membership (id, union, member_count) VALUES (1, 'construction', 3500), (2, 'education', 8000), (3, 'manufacturing', 5000);", "sql": "SELECT MAX(member_count) FROM union_membership;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 521).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who built the conant creek pegram truss railroad bridge?", "schema": "CREATE TABLE table_name_71 (built VARCHAR, name VARCHAR)", "sql": "SELECT built FROM table_name_71 WHERE name = 'conant creek pegram truss railroad bridge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Director for the episode titled “some lapse of time”?", "schema": "CREATE TABLE table_name_85 (director VARCHAR, title VARCHAR)", "sql": "SELECT director FROM table_name_85 WHERE title = '“some lapse of time”';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total quantity of gold mined by each mine, partitioned by quarter and ordered by the most mined?", "schema": "CREATE TABLE mine (mine_id INT, mine_name VARCHAR(50), location VARCHAR(50), quantity_gold_mined INT); INSERT INTO mine (mine_id, mine_name, location, quantity_gold_mined) VALUES (1, 'Jade Mine', 'South Africa', 2000), (2, 'Emerald Mine', 'Russia', 3000), (3, 'Ruby Mine', 'Chile', 4000);", "sql": "SELECT mine_name, SUM(quantity_gold_mined) OVER (PARTITION BY DATE_TRUNC('quarter', date) ORDER BY SUM(quantity_gold_mined) DESC) as total_gold_mined FROM mine_gold_stats;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 171, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_aggregate': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3;", "explanation": "Regression test for Partition Aggregate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has a Youth (15-24) Literacy Rate Total of 99%, and a Youth Men of 98%?", "schema": "CREATE TABLE table_name_50 (country VARCHAR, youth__15_24__literacy_rate_total VARCHAR, youth_men VARCHAR)", "sql": "SELECT country FROM table_name_50 WHERE youth__15_24__literacy_rate_total = '99%' AND youth_men = '98%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team was the opponent on 09/20/1975?", "schema": "CREATE TABLE table_name_62 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_62 WHERE date = '09/20/1975';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the name of storms which don't have affected region in record.", "schema": "CREATE TABLE affected_region (name VARCHAR, storm_id VARCHAR); CREATE TABLE storm (name VARCHAR, storm_id VARCHAR)", "sql": "SELECT name FROM storm WHERE NOT storm_id IN (SELECT storm_id FROM affected_region);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Show all records in the space_exploration table where the mission_status is 'Completed' or agency is 'ESA'", "schema": "CREATE TABLE space_exploration (id INT, mission_name VARCHAR(255), mission_status VARCHAR(255), agency VARCHAR(255), launch_date DATE);", "sql": "SELECT * FROM space_exploration WHERE mission_status = 'Completed' OR agency = 'ESA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Find the maximum and minimum number of employees for companies in the e-commerce sector.", "schema": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, employees INT, founding_date DATE);", "sql": "SELECT MAX(employees), MIN(employees) FROM companies WHERE industry = 'E-commerce';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Find the number of graduate students enrolled in each department, excluding those enrolled in the 'Computer Science' department.", "schema": "CREATE TABLE graduate_students (id INT, department VARCHAR(20), enrollment_status VARCHAR(10)); INSERT INTO graduate_students (id, department, enrollment_status) VALUES (1, 'Computer Science', 'Enrolled'), (2, 'Physics', 'Enrolled'), (3, 'Mathematics', 'Not Enrolled');", "sql": "SELECT department, COUNT(*) as enrollment_count FROM graduate_students WHERE department != 'Computer Science' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Trap (example 7).", "schema": null, "sql": "create function trap_matching_test(int) returns int as $$\ndeclare x int;\n\tsx smallint;\n\ty int;\nbegin\n\tbegin\t-- start a subtransaction\n\t\tx := 100 / $1;\n\t\tsx := $1;\n\t\tselect into y data from match_source where id =\n\t\t\t(select id from match_source b where ten = $1);\n\texception\n\t\twhen data_exception then -- category match\n\t\t\traise notice 'caught data_exception';\n\t\t\tx := -1;\n\t\twhen NUMERIC_VALUE_OUT_OF_RANGE OR CARDINALITY_VIOLATION then\n\t\t\traise notice 'caught numeric_value_out_of_range or cardinality_violation';\n\t\t\tx := -2;\n\tend;\n\treturn x;\nend$$ language plpgsql;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Trap.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 568, "num_statements": 13} {"question": "Generate PostgreSQL SQL for: What year was the Grand Prix Passing Shot Bordeaux that had Runners-up of Diego Nargiso?", "schema": "CREATE TABLE table_name_96 (year VARCHAR, tournament_name VARCHAR, runners_up VARCHAR)", "sql": "SELECT year FROM table_name_96 WHERE tournament_name = 'grand prix passing shot bordeaux' AND runners_up = 'diego nargiso';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the model that had years 2004-?", "schema": "CREATE TABLE table_name_67 (model VARCHAR, years VARCHAR)", "sql": "SELECT model FROM table_name_67 WHERE years = '2004-';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What are the names and quantities of chemicals produced by factories located in California?", "schema": "CREATE TABLE factories (id INT, name TEXT, location TEXT); INSERT INTO factories (id, name, location) VALUES (1, 'Factory A', 'California'), (2, 'Factory B', 'Texas'); CREATE TABLE chemical_produced (factory_id INT, chemical_name TEXT, quantity INT); INSERT INTO chemical_produced (factory_id, chemical_name, quantity) VALUES (1, 'Chemical X', 100), (1, 'Chemical Y', 200), (2, 'Chemical Z', 300);", "sql": "SELECT chemical_name, quantity FROM chemical_produced CP JOIN factories F ON CP.factory_id = F.id WHERE F.location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Update 'emergency_response' table to set all response times above 60 minutes to 60 minutes", "schema": "CREATE TABLE emergency_response (id INT, incident_id INT, response_time INT, PRIMARY KEY(id));", "sql": "UPDATE emergency_response SET response_time = 60 WHERE response_time > 60;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Year, when Opponent is #2 Syracuse?", "schema": "CREATE TABLE table_name_52 (year INTEGER, opponent VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_52 WHERE opponent = '#2 syracuse';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the build date when total produced is 2?", "schema": "CREATE TABLE table_name_80 (build_date VARCHAR, total_produced VARCHAR)", "sql": "SELECT build_date FROM table_name_80 WHERE total_produced = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When John Senden was the runner-up, what was the To Par?", "schema": "CREATE TABLE table_name_76 (to_par VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT to_par FROM table_name_76 WHERE runner_s__up = 'john senden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 454).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For Saint-Paul parish, if it has an area of over 228.65 kilometers how many people live there?", "schema": "CREATE TABLE table_name_59 (population VARCHAR, official_name VARCHAR, area_km_2 VARCHAR)", "sql": "SELECT COUNT(population) FROM table_name_59 WHERE official_name = 'saint-paul' AND area_km_2 > 228.65;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the minimum rating of sustainable beauty products sold in Asia?", "schema": "CREATE TABLE sustainability (id INT, product VARCHAR(255), sustainable BOOLEAN, rating FLOAT, region VARCHAR(255)); INSERT INTO sustainability (id, product, sustainable, rating, region) VALUES (1, 'Moisturizer', true, 4.5, 'Asia'), (2, 'Toner', false, 3.0, 'Asia'), (3, 'Cleanser', true, 4.0, 'Asia');", "sql": "SELECT MIN(rating) FROM sustainability WHERE sustainable = true AND region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total funding received by the 'Artificial Intelligence' research grant?", "schema": "CREATE TABLE Grants (ID INT, Name VARCHAR(50), Category VARCHAR(50), Amount FLOAT); INSERT INTO Grants (ID, Name, Category, Amount) VALUES (1, 'AI Research', 'Artificial Intelligence', 500000), (2, 'ML Project', 'Machine Learning', 300000);", "sql": "SELECT SUM(Amount) FROM Grants WHERE Category = 'Artificial Intelligence';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What are the mental health scores for students in 'Intro to Psychology'?", "schema": "CREATE TABLE mental_health (student_id INT, course_name TEXT, score INT); INSERT INTO mental_health (student_id, course_name, score) VALUES (123, 'Intro to Psychology', 75), (456, 'Intro to Psychology', 85), (789, 'Intro to Psychology', 95);", "sql": "SELECT student_id, score FROM mental_health WHERE course_name = 'Intro to Psychology';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 96).", "schema": null, "sql": "SELECT * FROM check_test(\n function_owner_is('public', 'somefunction', ARRAY['integer'], 'no one', 'mumble'),\n\tfalse,\n 'function_owner_is(sch, function, args[integer], non-user, desc)',\n 'mumble',\n ' have: ' || current_user || '\n want: no one'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 272, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 126).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION col_type_is ( NAME, NAME, NAME, NAME, TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 221).", "schema": null, "sql": "SELECT e.most_common_vals, e.most_common_val_nulls,\n e.most_common_freqs, e.most_common_base_freqs\nFROM pg_stats_ext AS e\nWHERE e.statistics_schemaname = 'stats_import' AND\n e.statistics_name = 'test_stat_mcv_exprs' AND\n e.inherited = false \\gx\n\n-- Incorrect extended stats kind, mcv not supported\nSELECT pg_catalog.pg_restore_extended_stats(\n 'schemaname', 'stats_import',\n 'relname', 'test',\n 'statistics_schemaname', 'stats_import',\n 'statistics_name', 'test_stat_dependencies',\n 'inherited', false,\n 'most_common_vals', '{{four,NULL},\n {one,\"(1,1.1,ONE,01-01-2001,\\\"{\\\"\\\"xkey\\\"\\\": \\\"\\\"xval\\\"\\\"}\\\")\"},\n {tre,\"(3,3.3,TRE,03-03-2003,)\"},\n {two,\"(2,2.2,TWO,02-02-2002,\\\"[true, 4, \\\"\\\"six\\\"\\\"]\\\")\"}}'::text[],\n 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[],\n 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]);", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT e.most_common_vals, e.most_common_val_nulls,\n e.most_common_freqs, e.most_common_base_freqs\nFROM pg_stats_ext AS e\nWHERE e.statistics_schemaname = 'stats_import' AND\n e.statistics_name = 'test_stat_mcv_exprs' AND\n e.inherited = false \\gx\n\n-- Incorrect extended stats kind, mcv not supported\nSELECT pg_catalog.pg_restore_extended_stats(\n 'schemaname', 'stats_import',\n 'relname', 'test',\n 'statistics_schemaname', 'stats_import',\n 'statistics_name', 'test_stat_dependencies',\n 'inherited', false,\n 'most_common_vals', '{{four,NULL},\n {one,\"(1,1.1,ONE,01-01-2001,\\\"{\\\"\\\"xkey\\\"\\\": \\\"\\\"xval\\\"\\\"}\\\")\"},\n {tre,\"(3,3.3,TRE,03-03-2003,)\"},\n {two,\"(2,2.2,TWO,02-02-2002,\\\"[true, 4, \\\"\\\"six\\\"\\\"]\\\")\"}}'::text[],\n 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[],\n 'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 945, "num_statements": 1} {"question": "What is the maximum billing amount for cases handled by female attorneys?", "schema": "CREATE TABLE AttorneyGender (AttorneyID INT, Gender VARCHAR(10)); INSERT INTO AttorneyGender (AttorneyID, Gender) VALUES (1, 'Female'), (2, 'Male'), (3, 'Female');", "sql": "SELECT MAX(BillingAmount) FROM AttorneyBilling JOIN AttorneyGender ON AttorneyBilling.AttorneyID = AttorneyGender.AttorneyID WHERE Gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Tournament on October 24, 1982 had Alycia Moulton as the Partner?", "schema": "CREATE TABLE table_name_23 (tournament VARCHAR, date VARCHAR, partner VARCHAR)", "sql": "SELECT tournament FROM table_name_23 WHERE date = 'october 24, 1982' AND partner = 'alycia moulton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show the total revenue for each month in 2022.", "schema": "CREATE TABLE MonthlyRevenue (Month VARCHAR(10), Revenue INT); INSERT INTO MonthlyRevenue (Month, Revenue) VALUES ('January', 5000), ('February', 6000), ('March', 7000);", "sql": "SELECT Month, SUM(Revenue) FROM MonthlyRevenue WHERE Month IN ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December') AND YEAR(STR_TO_DATE(Month, '%B')) = 2022 GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "What is the average speed of vessels that transport hazardous materials?", "schema": "CREATE TABLE Vessels (VesselID INT, Name TEXT, Type TEXT, MaxSpeed FLOAT); CREATE TABLE Cargo (CargoID INT, VesselID INT, Material TEXT, Quantity INT); INSERT INTO Vessels VALUES (1, 'Tanker 1', 'Oil Tanker', 15.5); INSERT INTO Cargo VALUES (1, 1, 'Hazardous', NULL);", "sql": "SELECT AVG(Vessels.MaxSpeed) FROM Vessels INNER JOIN Cargo ON Vessels.VesselID = Cargo.VesselID WHERE Cargo.Material = 'Hazardous';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 3).", "schema": null, "sql": "--\n-- gist_page_items()\n--\nCREATE FUNCTION gist_page_items(IN page bytea,\n IN index_oid regclass,\n OUT itemoffset smallint,\n OUT ctid tid,\n OUT itemlen smallint,\n OUT dead boolean,\n OUT keys text)\nRETURNS SETOF record\nAS 'MODULE_PATHNAME', 'gist_page_items'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 308, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what county is the school of Merrillville?", "schema": "CREATE TABLE table_name_17 (county VARCHAR, school VARCHAR)", "sql": "SELECT county FROM table_name_17 WHERE school = 'merrillville';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'plpgsql' (example 655).", "schema": null, "sql": "create function arrayassign1() returns text[] language plpgsql as $$\ndeclare\n r record;\nbegin\n r := row(12, '{foo,bar,baz}')::rtype;\n r.ar[2] := 'replace';\n return r.ar;\nend$$;", "explanation": "DDL from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 179, "num_statements": 5} {"question": "List biosensor technologies in descending order based on their maximum temperature", "schema": "CREATE SCHEMA if not exists biosensors;CREATE TABLE if not exists biosensors.temperature (id INT, biosensor_name VARCHAR(255), temperature DECIMAL(10,2)); INSERT INTO biosensors.temperature (id, biosensor_name, temperature) VALUES (1, 'BioTherm', 37.5), (2, 'BioSense', 38.2), (3, 'BioTemp', 37.8), (4, 'BioCool', 35.0);", "sql": "SELECT biosensor_name, MAX(temperature) max_temperature FROM biosensors.temperature GROUP BY biosensor_name ORDER BY max_temperature DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which From club had a Transfer fee of £3.87m?", "schema": "CREATE TABLE table_name_40 (from_club VARCHAR, transfer_fee VARCHAR)", "sql": "SELECT from_club FROM table_name_40 WHERE transfer_fee = '£3.87m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'enum': Write the SELECT query (example 68).", "schema": null, "sql": "SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col;", "explanation": "Regression test for Enum in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_finalize, item 3).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION zdb.get_highlight_analysis_info(index_name regclass, field text)\n RETURNS TABLE\n (\n type text,\n normalizer text,\n index_tokenizer text,\n search_tokenizer text\n )\n LANGUAGE sql\nAS\n$$\nWITH mapping AS (SELECT jsonb_extract_path(\n zdb.index_mapping(index_name),\n VARIADIC ARRAY [zdb.index_name(index_name), 'mappings', 'properties'] ||\n string_to_array(replace(field, '.', '.properties.'), '.')) AS mapping)\nSELECT mapping ->> 'type' AS type,\n mapping ->> 'normalizer' AS normalizer,\n mapping ->> 'analyzer' AS index_analyzer,\n mapping ->> 'search_analyzer' AS search_analyzer\nFROM mapping;\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 923, "num_statements": 2} {"question": "Identify the number of transactions and total transaction amount per day for the past week", "schema": "CREATE TABLE transactions (id INT, customer_id INT, transaction_date DATE, amount FLOAT); INSERT INTO transactions (id, customer_id, transaction_date, amount) VALUES (1, 1, '2022-01-01', 1000.00), (2, 2, '2022-01-02', 2000.00), (3, 1, '2022-01-03', 1500.00);", "sql": "SELECT DATE(transaction_date) AS transaction_date, COUNT(*) AS num_transactions, SUM(amount) AS total_amount FROM transactions WHERE transaction_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 7 DAY) AND CURDATE() GROUP BY transaction_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "Insert a new record of circular economy initiative for the city of Tokyo in 2027.", "schema": "CREATE TABLE circular_economy(city VARCHAR(20), year INT, initiative VARCHAR(50));", "sql": "INSERT INTO circular_economy VALUES('Tokyo', 2027, 'Implementing a city-wide food waste reduction program');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many instances are there of party in the situation where Robert Bauman is the incumbent politician?", "schema": "CREATE TABLE table_1341690_20 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(party) FROM table_1341690_20 WHERE incumbent = 'Robert Bauman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_info' (example 9).", "schema": null, "sql": "CREATE TABLE ptif_test01 PARTITION OF ptif_test0 FOR VALUES IN (1);", "explanation": "DDL from PostgreSQL core regression test for Partition Info.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Find the number of players who have played games from only one genre.", "schema": "CREATE TABLE PlayerGames (PlayerID INT, GameID INT, GameGenre TEXT); INSERT INTO PlayerGames (PlayerID, GameID, GameGenre) VALUES (1, 1, 'Action'), (1, 2, 'RPG'), (2, 3, 'Strategy'), (2, 4, 'Simulation'), (3, 5, 'FPS'), (3, 6, 'FPS'), (4, 7, 'Action'), (4, 8, 'RPG'), (5, 9, 'Strategy'), (5, 10, 'Simulation'), (6, 11, 'Action'); CREATE TABLE Players (PlayerID INT); INSERT INTO Players (PlayerID) VALUES (1), (2), (3), (4), (5), (6);", "sql": "SELECT COUNT(DISTINCT PlayerID) FROM (SELECT PlayerID FROM PlayerGames GROUP BY PlayerID HAVING COUNT(DISTINCT GameGenre) = 1) AS SingleGenrePlayers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Find the total billing amount for cases handled by attorney 'Lee' in 2017.", "schema": "CREATE TABLE case_billing (case_id INT, attorney_id INT, billing_amount DECIMAL, case_date DATE); CREATE TABLE attorneys (attorney_id INT, attorney_last_name VARCHAR(50));", "sql": "SELECT SUM(billing_amount) FROM case_billing JOIN attorneys ON case_billing.attorney_id = attorneys.attorney_id WHERE attorneys.attorney_last_name = 'Lee' AND case_date BETWEEN '2017-01-01' AND '2017-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'geometry' (example 9).", "schema": null, "sql": "-- \"is vertical\" function\nSELECT p1.f1\n FROM POINT_TBL p1\n WHERE isvertical(p1.f1, point '(5.1,34.5)');", "explanation": "PL/pgSQL object from PostgreSQL core test for Geometry.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average number of spectators in fighting game esports events?", "schema": "CREATE TABLE EsportsEvents (EventID INT, Game VARCHAR(20), Spectators INT); INSERT INTO EsportsEvents (EventID, Game, Spectators) VALUES (1, 'Fighting', 5000);", "sql": "SELECT AVG(Spectators) FROM EsportsEvents WHERE Game = 'Fighting';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Insert a new smart city technology adoption project", "schema": "CREATE TABLE smart_city_projects (id INT, name VARCHAR(50), location VARCHAR(50), adoption_rate FLOAT);", "sql": "INSERT INTO smart_city_projects (id, name, location, adoption_rate) VALUES (4, 'Smart Lighting', 'City A', 0.65);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of courses that have either 3 credits or 1 credit but 4 hours.", "schema": "CREATE TABLE COURSE (CName VARCHAR, Credits VARCHAR, Hours VARCHAR)", "sql": "SELECT CName FROM COURSE WHERE Credits = 3 UNION SELECT CName FROM COURSE WHERE Credits = 1 AND Hours = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total investment in the 'Equity' fund type for customers in the 'Europe' region?", "schema": "CREATE TABLE investments (id INT, customer_id INT, fund_type VARCHAR(50), investment_amount DECIMAL(10,2)); INSERT INTO investments (id, customer_id, fund_type, investment_amount) VALUES (1, 1, 'Bond', 10000.00); INSERT INTO investments (id, customer_id, fund_type, investment_amount) VALUES (2, 2, 'Equity', 15000.00); INSERT INTO investments (id, customer_id, fund_type, investment_amount) VALUES (3, 3, 'Bond', 20000.00); INSERT INTO investments (id, customer_id, fund_type, investment_amount) VALUES (4, 4, 'Equity', 25000.00);", "sql": "SELECT SUM(investment_amount) FROM investments WHERE fund_type = 'Equity' AND customer_id IN (SELECT id FROM customers WHERE region = 'Europe');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'misc' (example 7).", "schema": null, "sql": "--\n-- BTREE shutting out non-functional updates\n--\n-- the following two tests seem to take a long time on some\n-- systems. This non-func update stuff needs to be examined\n-- more closely. \t\t\t- jolly (2/22/96)\n--\nSELECT two, stringu1, ten, string4\n INTO TABLE tmp\n FROM onek;", "explanation": "PL/pgSQL object from PostgreSQL core test for Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 282, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'tsearch' (example 376).", "schema": null, "sql": "-- Test inlining of immutable constant functions\n\n-- to_tsquery(text) is not immutable, so it won't be inlined\nexplain (costs off)\nselect * from test_tsquery, to_tsquery('new') q where txtsample @@ q;", "explanation": "PL/pgSQL object from PostgreSQL core test for Tsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points for the cooper car company after 1959?", "schema": "CREATE TABLE table_name_90 (points INTEGER, entrant VARCHAR, year VARCHAR)", "sql": "SELECT SUM(points) FROM table_name_90 WHERE entrant = 'cooper car company' AND year > 1959;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Delete all ingredient source records for the product named \"Rose Hip Oil\" from the brand \"PureNature\".", "schema": "CREATE TABLE ingredient_sources (id INT PRIMARY KEY, product_id INT, ingredient TEXT, country TEXT, source TEXT); CREATE TABLE products (id INT PRIMARY KEY, name TEXT, brand TEXT); INSERT INTO products (id, name, brand) VALUES (1, 'Rose Hip Oil', 'PureNature'); INSERT INTO ingredient_sources (id, product_id, ingredient, country, source) VALUES (1, 1, 'Rose Hip Seed Oil', 'Chile', 'Organic Farm');", "sql": "DELETE FROM ingredient_sources WHERE product_id = (SELECT id FROM products WHERE name = 'Rose Hip Oil' AND brand = 'PureNature');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Find the average temperature and humidity for the crops in field 3 during the last week.", "schema": "CREATE TABLE field_sensors (field_id INT, sensor_type VARCHAR(20), value FLOAT, timestamp TIMESTAMP); INSERT INTO field_sensors (field_id, sensor_type, value, timestamp) VALUES (3, 'temperature', 28.5, '2023-02-15 10:00:00'), (3, 'humidity', 35.0, '2023-02-15 10:00:00');", "sql": "SELECT field_id, AVG(value) FROM field_sensors WHERE sensor_type IN ('temperature', 'humidity') AND timestamp >= NOW() - INTERVAL 7 DAY GROUP BY field_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Eric Dickerson's longest run?", "schema": "CREATE TABLE table_11157122_5 (longest_run VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(longest_run) FROM table_11157122_5 WHERE player = 'Eric Dickerson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the maximum account balance for customers in the Latin America region?", "schema": "CREATE TABLE customer_data (customer_id INT, account_balance DECIMAL(10, 2), region VARCHAR(20)); INSERT INTO customer_data (customer_id, account_balance, region) VALUES (1, 5000, 'Latin America'), (2, 7000, 'North America'), (3, 6000, 'Latin America');", "sql": "SELECT MAX(account_balance) FROM customer_data WHERE region = 'Latin America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total number of students enrolled in each district, and what is the minimum mental health score for students in each district?", "schema": "CREATE TABLE districts (district_id INT, district_name TEXT); CREATE TABLE students (student_id INT, district_id INT, mental_health_score INT); INSERT INTO districts VALUES (1, 'District A'), (2, 'District B'); INSERT INTO students VALUES (1, 1, 60), (2, 1, 75), (3, 2, 45), (4, 2, 30);", "sql": "SELECT d.district_name, COUNT(s.student_id) as num_students, MIN(s.mental_health_score) as min_mental_health_score FROM students s JOIN districts d ON s.district_id = d.district_id GROUP BY s.district_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "What is the minimum claim amount for policyholders living in the 'south' region with 'car insurance' policies?", "schema": "CREATE TABLE policyholders (id INT, policy_type VARCHAR(20), region VARCHAR(10), claim_amount INT); INSERT INTO policyholders (id, policy_type, region, claim_amount) VALUES (1, 'car insurance', 'south', 5000), (2, 'home insurance', 'north', 3000), (3, 'car insurance', 'south', 1000);", "sql": "SELECT MIN(claim_amount) FROM policyholders WHERE policy_type = 'car insurance' AND region = 'south';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Which maintenance types were performed on each route and how many times?", "schema": "CREATE TABLE RouteMaintenance (VehicleID INT, RouteID INT, MaintenanceType VARCHAR(50)); INSERT INTO RouteMaintenance (VehicleID, RouteID, MaintenanceType) VALUES (101, 1, 'Oil Change'); INSERT INTO RouteMaintenance (VehicleID, RouteID, MaintenanceType) VALUES (102, 2, 'Tire Rotation');", "sql": "SELECT RouteID, MaintenanceType, COUNT(*) AS MaintenanceCount FROM RouteMaintenance GROUP BY RouteID, MaintenanceType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "What was the average donation amount per donor by region for 2022?", "schema": "CREATE TABLE Donors (donor_id INT, donation_amount DECIMAL(10,2), donor_region VARCHAR(255), donation_date DATE); INSERT INTO Donors (donor_id, donation_amount, donor_region, donation_date) VALUES (1, 500, 'North', '2022-01-01'), (2, 350, 'South', '2022-02-01'), (3, 700, 'East', '2022-03-01'), (4, 280, 'West', '2022-04-01'), (5, 600, 'North', '2022-05-01');", "sql": "SELECT donor_region, AVG(donation_amount) as avg_donation FROM Donors WHERE donation_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY donor_region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won Womens Singles in the year that Ma Lin won Mens Singles?", "schema": "CREATE TABLE table_28138035_15 (womens_singles VARCHAR, mens_singles VARCHAR)", "sql": "SELECT womens_singles FROM table_28138035_15 WHERE mens_singles = 'Ma Lin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "pgTAP test for Unique (assertion 6).", "schema": null, "sql": "/****************************************************************************/\n-- Test has_unique().\n\nSELECT * FROM check_test(\n has_unique( 'public', 'sometab', 'public.sometab should have a unique constraint' ),\n true,\n 'has_unique( schema, table, description )',\n 'public.sometab should have a unique constraint',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Unique.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 338, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the notes for the time of 6:34.51?", "schema": "CREATE TABLE table_name_56 (notes VARCHAR, time VARCHAR)", "sql": "SELECT notes FROM table_name_56 WHERE time = '6:34.51';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 886).", "schema": null, "sql": "select (-1.0) ^ 2147483647;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select (-1.0) ^ 2147483647) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "What are the names and research interests of faculty members who have received grants from the 'National Science Foundation'?", "schema": "CREATE TABLE Faculty (FacultyID int, Name varchar(50), ResearchInterest varchar(50)); INSERT INTO Faculty (FacultyID, Name, ResearchInterest) VALUES (1, 'John Smith', 'Machine Learning'); INSERT INTO Faculty (FacultyID, Name, ResearchInterest) VALUES (2, 'Jane Doe', 'Data Science'); CREATE TABLE Grants (GrantID int, Grantor varchar(50), FacultyID int); INSERT INTO Grants (GrantID, Grantor, FacultyID) VALUES (1, 'National Science Foundation', 1); INSERT INTO Grants (GrantID, Grantor, FacultyID) VALUES (2, 'Microsoft Research', 2);", "sql": "SELECT Faculty.Name, Faculty.ResearchInterest FROM Faculty INNER JOIN Grants ON Faculty.FacultyID = Grants.FacultyID WHERE Grants.Grantor = 'National Science Foundation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the ICAO in India with IATA TRZ?", "schema": "CREATE TABLE table_name_90 (icao VARCHAR, country VARCHAR, iata VARCHAR)", "sql": "SELECT icao FROM table_name_90 WHERE country = 'india' AND iata = 'trz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 386).", "schema": null, "sql": "CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Team 2 has a Team 1 of vardar?", "schema": "CREATE TABLE table_name_76 (team_2 VARCHAR, team_1 VARCHAR)", "sql": "SELECT team_2 FROM table_name_76 WHERE team_1 = 'vardar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'misc': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT name(equipment(ROW('skywalking', 'mer')));", "explanation": "Regression test for Misc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT name(equipment(ROW('skywalking', 'mer')))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 162).", "schema": null, "sql": "SELECT '-2147483648.6'::float8::int4;", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '-2147483648.6'::float8::int4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Insert new records into 'OilRigs' table for the following data: (RigID, Location, Status, Type) - ('Rig02', 'Barents Sea', 'Inactive', 'Offshore'), ('Rig03', 'North Sea', 'Operational', 'Onshore')", "schema": "CREATE TABLE OilRigs (RigID VARCHAR(10), Location VARCHAR(20), Status VARCHAR(20), Type VARCHAR(10));", "sql": "INSERT INTO OilRigs (RigID, Location, Status, Type) VALUES ('Rig02', 'Barents Sea', 'Inactive', 'Offshore'), ('Rig03', 'North Sea', 'Operational', 'Onshore');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Find the number of properties in the historic_buildings table that are 'landmarks'.", "schema": "CREATE TABLE historic_buildings (property_id INT, is_landmark BOOLEAN); INSERT INTO historic_buildings VALUES (1, true), (2, false), (3, false)", "sql": "SELECT COUNT(*) FROM historic_buildings WHERE is_landmark = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List the names of all soccer players who have scored a hat-trick (3 goals) in a single match, along with the team they played for and the date of the match.", "schema": "CREATE TABLE soccer_games (id INT, date DATE, home_team VARCHAR(50), away_team VARCHAR(50), goals_home INT, goals_away INT); CREATE TABLE soccer_players_goals (id INT, game_id INT, player_id INT, goals INT); CREATE TABLE soccer_players (id INT, name VARCHAR(100), team VARCHAR(50));", "sql": "SELECT p.name, g.home_team, g.date FROM soccer_games g JOIN soccer_players_goals pg ON g.id = pg.game_id JOIN soccer_players p ON pg.player_id = p.id WHERE pg.goals >= 3 GROUP BY p.name, g.date, g.home_team ORDER BY g.date; SELECT p.name, g.away_team, g.date FROM soccer_games g JOIN soccer_players_goals pg ON g.id = pg.game_id JOIN soccer_players p ON pg.player_id = p.id WHERE pg.goals >= 3 GROUP BY p.name, g.date, g.away_team ORDER BY g.date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 447, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: what game had an attendance of 21,629", "schema": "CREATE TABLE table_name_70 (record VARCHAR, attendance VARCHAR)", "sql": "SELECT record FROM table_name_70 WHERE attendance = '21,629';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of hours spent on training programs in the last 6 months?", "schema": "CREATE TABLE TrainingSessions (SessionID INT, EmployeeID INT, SessionDate DATE, SessionHours DECIMAL(3,2)); INSERT INTO TrainingSessions (SessionID, EmployeeID, SessionDate, SessionHours) VALUES (1, 1, '2022-01-01', 2.00), (2, 1, '2022-02-01', 3.00);", "sql": "SELECT SUM(SessionHours) FROM TrainingSessions WHERE SessionDate >= DATEADD(month, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the average number of followers for users who engaged with at least 10 advertisements in the past month?", "schema": "CREATE TABLE user_ads (user_id INT, ad_date DATE); CREATE TABLE users (id INT, followers INT);", "sql": "SELECT AVG(f.followers) FROM users f JOIN (SELECT user_id FROM user_ads WHERE ad_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY user_id HAVING COUNT(*) >= 10) t ON f.id = t.user_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 373).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest CPC blend Kazakhstan number when Barrow Island Australia is smaller than 12?", "schema": "CREATE TABLE table_name_48 (cpc_blend_kazakhstan INTEGER, barrow_island_australia INTEGER)", "sql": "SELECT MAX(cpc_blend_kazakhstan) FROM table_name_48 WHERE barrow_island_australia < 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of hours spent on open pedagogy initiatives by each teacher?", "schema": "CREATE TABLE teachers (id INT PRIMARY KEY, name VARCHAR(255)); CREATE TABLE open_pedagogy_initiatives (id INT PRIMARY KEY, teacher_id INT, hours_spent INT);", "sql": "SELECT t.name, SUM(opi.hours_spent) FROM open_pedagogy_initiatives opi JOIN teachers t ON opi.teacher_id = t.id GROUP BY opi.teacher_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT ROUND WAS FORWARD ANDRE PETERSSON SELECTED?", "schema": "CREATE TABLE table_11803648_17 (round INTEGER, position VARCHAR)", "sql": "SELECT MIN(round) FROM table_11803648_17 WHERE position = 'Forward';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of wind energy projects in Canada and Germany in the last 10 years?", "schema": "CREATE TABLE wind_energy_projects (id INT, country VARCHAR(255), year INT, completed BOOLEAN); INSERT INTO wind_energy_projects (id, country, year, completed) VALUES (1, 'Canada', 2021, true), (2, 'Canada', 2019, true), (3, 'Canada', 2020, true), (4, 'Germany', 2018, true), (5, 'Germany', 2017, true), (6, 'Germany', 2016, true), (7, 'Canada', 2015, true), (8, 'Germany', 2015, true);", "sql": "SELECT COUNT(*) FROM wind_energy_projects WHERE country IN ('Canada', 'Germany') AND year >= (SELECT YEAR(CURRENT_DATE()) - 10);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the number of animals released back into the wild for each species?", "schema": "CREATE TABLE release_centers (center_id INT, center_name VARCHAR(50));CREATE TABLE animal_releases (release_id INT, animal_id INT, species_id INT, center_id INT, release_date DATE); INSERT INTO release_centers (center_id, center_name) VALUES (1, 'Release Center A'), (2, 'Release Center B'); INSERT INTO animal_releases (release_id, animal_id, species_id, center_id, release_date) VALUES (1001, 101, 1, 1, '2021-01-01'), (1002, 102, 2, 1, '2021-03-01'), (1003, 103, 3, 2, '2021-05-01');", "sql": "SELECT s.species_name, COUNT(a.animal_id) AS total_released FROM animal_releases a JOIN release_centers rc ON a.center_id = rc.center_id JOIN animal_species s ON a.species_id = s.species_id GROUP BY s.species_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 146).", "schema": null, "sql": "SELECT int8 '0o273';", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT int8 '0o273') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 20, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which United States player has a place of T3?", "schema": "CREATE TABLE table_name_3 (player VARCHAR, country VARCHAR, place VARCHAR)", "sql": "SELECT player FROM table_name_3 WHERE country = 'united states' AND place = 't3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'macaddr': Write the SELECT query (example 14).", "schema": null, "sql": "SELECT * FROM macaddr_data;", "explanation": "Regression test for Macaddr in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM macaddr_data) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many patients stay in room 112?", "schema": "CREATE TABLE stay (patient VARCHAR, room VARCHAR)", "sql": "SELECT COUNT(patient) FROM stay WHERE room = 112;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the pick number of Jesse Alexander?", "schema": "CREATE TABLE table_name_39 (pick__number INTEGER, player VARCHAR)", "sql": "SELECT MIN(pick__number) FROM table_name_39 WHERE player = 'jesse alexander';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "List the names of all cities that have never had a mayor from a historically underrepresented community?", "schema": "CREATE TABLE city (id INT, name VARCHAR(255)); INSERT INTO city (id, name) VALUES (1, 'New York'), (2, 'Los Angeles'), (3, 'Chicago'), (4, 'Houston'), (5, 'Phoenix'); CREATE TABLE mayor (id INT, city_id INT, name VARCHAR(255), community VARCHAR(255)); INSERT INTO mayor (id, city_id, name, community) VALUES (1, 1, 'John Smith', 'White'), (2, 1, 'James Johnson', 'African American'), (3, 2, 'Maria Rodriguez', 'Hispanic'), (4, 3, 'William Lee', 'Asian'), (5, 4, 'Robert Brown', 'White'), (6, 5, 'David Garcia', 'Hispanic');", "sql": "SELECT c.name FROM city c WHERE c.id NOT IN (SELECT m.city_id FROM mayor m WHERE m.community != 'White');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "How many vessels are there in each type in the 'fleet_management' table?", "schema": "CREATE TABLE fleet_management (id INT, name VARCHAR(50), type VARCHAR(50), capacity INT);", "sql": "SELECT type, COUNT(*) FROM fleet_management GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average ESG score for each investment strategy?", "schema": "CREATE TABLE InvestmentStrategies (StrategyID INT, StrategyName VARCHAR(20), ESGScore INT); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, ESGScore) VALUES (1, 'Impact Investing', 80), (2, 'Green Energy', 90), (3, 'Social Entrepreneurship', 70), (4, 'Microfinance', 85);", "sql": "SELECT StrategyName, AVG(ESGScore) as AverageESG FROM InvestmentStrategies GROUP BY StrategyName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 7).", "schema": null, "sql": "SELECT * FROM test_text WHERE i='abc' ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Who are the employees that have been working in the company for more than 2 years and earn less than $60,000?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50), Salary FLOAT, HireDate DATE); INSERT INTO Employees (EmployeeID, Name, Department, Position, Salary, HireDate) VALUES (1, 'John Doe', 'IT', 'Developer', 75000.00, '2020-02-14'), (2, 'Jane Smith', 'IT', 'Tester', 60000.00, '2022-05-11'), (3, 'Alice Johnson', 'Marketing', 'Marketing Specialist', 60000.00, '2019-08-01'), (4, 'Bob Brown', 'HR', 'HR Specialist', 65000.00, '2021-11-15');", "sql": "SELECT * FROM Employees WHERE DATEDIFF(CURDATE(), HireDate) > 730 AND Salary < 60000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete all ingredients associated with the food product with id 1", "schema": "CREATE TABLE ingredients (id INT PRIMARY KEY, product_id INT, name TEXT, quantity REAL);", "sql": "DELETE FROM ingredients WHERE product_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Which causes have received donations from donors in India?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, country TEXT); CREATE TABLE donations (donation_id INT, donor_id INT, cause_id INT, donation_amount DECIMAL); INSERT INTO donors (donor_id, donor_name, country) VALUES (1, 'Aisha Patel', 'India'), (2, 'Hiroshi Tanaka', 'Japan'), (3, 'Clara Rodriguez', 'Brazil'); INSERT INTO donations (donation_id, donor_id, cause_id, donation_amount) VALUES (1, 1, 1, 100.00), (2, 1, 1, 200.00), (3, 2, 2, 300.00), (4, 3, 3, 50.00);", "sql": "SELECT causes.cause_name FROM causes INNER JOIN donations ON causes.cause_id = donations.cause_id INNER JOIN donors ON donations.donor_id = donors.donor_id WHERE donors.country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "What is the total number of artworks in the museum that have never been checked out?", "schema": "CREATE TABLE artworks(artwork_id INT, title VARCHAR(50), is_checked_out INT); INSERT INTO artworks (artwork_id, title, is_checked_out) VALUES (1, 'Mona Lisa', 1), (2, 'Starry Night', 0), (3, 'The Persistence of Memory', 0);", "sql": "SELECT COUNT(artwork_id) FROM artworks WHERE is_checked_out = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 240).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_function( NAME, NAME, NAME[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years did caroline lubrez win?", "schema": "CREATE TABLE table_name_19 (year INTEGER, winner VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_19 WHERE winner = 'caroline lubrez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Which exhibition categories had the most and least visitors?", "schema": "CREATE TABLE visitors_exhibitions (visitor_id INT, exhibition_id INT, exhibition_category VARCHAR(10)); INSERT INTO visitors_exhibitions (visitor_id, exhibition_id, exhibition_category) VALUES (1, 1, 'Art'), (2, 2, 'Science');", "sql": "SELECT exhibition_category, COUNT(visitor_id) AS num_visitors FROM visitors_exhibitions GROUP BY exhibition_category ORDER BY num_visitors DESC, exhibition_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Class of 125cc, and a Year smaller than 1966 had what lowest wins?", "schema": "CREATE TABLE table_name_95 (wins INTEGER, class VARCHAR, year VARCHAR)", "sql": "SELECT MIN(wins) FROM table_name_95 WHERE class = '125cc' AND year < 1966;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest points of the club with less than 9 draws, 11 wins, and more than 40 goals?", "schema": "CREATE TABLE table_name_28 (points INTEGER, goals_for VARCHAR, draws VARCHAR, wins VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_28 WHERE draws < 9 AND wins = 11 AND goals_for > 40;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average annual precipitation in the 'RuralInfrastructure' table?", "schema": "CREATE TABLE RuralInfrastructure (location VARCHAR(50), year INT, precipitation FLOAT);", "sql": "SELECT AVG(precipitation) FROM (SELECT precipitation, ROW_NUMBER() OVER(PARTITION BY location ORDER BY year) as rn FROM RuralInfrastructure) WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest octal with a 30 hexadecimal and less than 0 glyphs?", "schema": "CREATE TABLE table_name_82 (octal INTEGER, hexadecimal VARCHAR, glyph VARCHAR)", "sql": "SELECT MIN(octal) FROM table_name_82 WHERE hexadecimal = 30 AND glyph < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 487).", "schema": null, "sql": "select range_agg(nmr) from (values ('{[1,2], [2,3]}'::nummultirange)) t(nmr);", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select range_agg(nmr) from (values ('{[1,2], [2,3]}'::nummultirange)) t(nmr)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Delete all records from the 'UnderwaterVolcanoes' table where the 'VolcanoName' starts with 'K'", "schema": "CREATE TABLE UnderwaterVolcanoes (VolcanoID INT, VolcanoName VARCHAR(255), Location VARCHAR(255), LastEruption DATE);", "sql": "DELETE FROM UnderwaterVolcanoes WHERE VolcanoName LIKE 'K%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the home team when swansea city was the away team?", "schema": "CREATE TABLE table_name_47 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team FROM table_name_47 WHERE away_team = 'swansea city';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many losses were there with 208 points?", "schema": "CREATE TABLE table_name_26 (lost VARCHAR, points_for VARCHAR)", "sql": "SELECT lost FROM table_name_26 WHERE points_for = '208';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of construction workers in the state of New York, grouped by occupation and gender?", "schema": "CREATE TABLE construction_workers (worker_id INT, occupation VARCHAR(50), state VARCHAR(50), gender VARCHAR(50), salary INT); INSERT INTO construction_workers (worker_id, occupation, state, gender, salary) VALUES (1, 'Carpenter', 'New York', 'Female', 60000); INSERT INTO construction_workers (worker_id, occupation, state, gender, salary) VALUES (2, 'Electrician', 'New York', 'Male', 70000);", "sql": "SELECT occupation, gender, COUNT(*) FROM construction_workers WHERE state = 'New York' GROUP BY occupation, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the surface for november 28, 2010", "schema": "CREATE TABLE table_name_34 (surface VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_34 WHERE date = 'november 28, 2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of research grants received by female faculty members in the Chemistry department?", "schema": "CREATE TABLE Faculty(Id INT, Name VARCHAR(100), Department VARCHAR(50), Gender VARCHAR(10), GrantAmount DECIMAL(10,2)); INSERT INTO Faculty(Id, Name, Department, Gender, GrantAmount) VALUES (1, 'Quinn', 'Chemistry', 'Female', 40000.00), (2, 'Rory', 'Chemistry', 'Female', 50000.00);", "sql": "SELECT SUM(GrantAmount) FROM Faculty WHERE Department = 'Chemistry' AND Gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many conservation projects were completed in the Arctic region between 2016 and 2021?", "schema": "CREATE TABLE conservation_efforts (id INT, project_name VARCHAR(255), location VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO conservation_efforts (id, project_name, location, start_date, end_date) VALUES (1, 'Coral Reef Restoration', 'Florida Keys', '2010-01-01', '2022-12-31'); INSERT INTO conservation_efforts (id, project_name, location, start_date, end_date) VALUES (2, 'Turtle Nesting Protection', 'Costa Rica', '2015-06-01', '2023-05-31'); INSERT INTO conservation_efforts (id, project_name, location, start_date, end_date) VALUES (3, 'Polar Bear Protection', 'Arctic', '2016-01-01', '2021-12-31');", "sql": "SELECT COUNT(*) as total_projects FROM conservation_efforts WHERE location = 'Arctic' AND YEAR(start_date) BETWEEN 2016 AND 2021 AND YEAR(end_date) BETWEEN 2016 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the distribution of spacecraft manufacturing costs by country?", "schema": "CREATE TABLE SpacecraftManufacturing (id INT, year INT, cost FLOAT, country TEXT);", "sql": "SELECT country, AVG(cost) as avg_cost, STDDEV(cost) as stddev_cost FROM SpacecraftManufacturing GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "PostgreSQL Typeconv: show example 11.", "schema": null, "sql": "CREATE DOMAIN mytext AS text CHECK(...); CREATE FUNCTION mytext_eq_text (mytext, text) RETURNS boolean AS ...; CREATE OPERATOR = (procedure=mytext_eq_text, leftarg=mytext, rightarg=text); CREATE TABLE mytable (val mytext); SELECT * FROM mytable WHERE val = 'foo';", "explanation": "Example from PostgreSQL documentation on Typeconv.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 263, "num_statements": 5} {"question": "Generate PostgreSQL SQL for: Which Drawn has Points for of 782?", "schema": "CREATE TABLE table_name_2 (drawn VARCHAR, points_for VARCHAR)", "sql": "SELECT drawn FROM table_name_2 WHERE points_for = '782';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total quantity of Tencel used by brands in 2022?", "schema": "CREATE TABLE tencel_usage (brand VARCHAR(50), quantity INT, year INT); INSERT INTO tencel_usage (brand, quantity, year) VALUES ('BrandJ', 12000, 2022), ('BrandK', 18000, 2022), ('BrandL', 9000, 2022);", "sql": "SELECT SUM(quantity) FROM tencel_usage WHERE year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For Laps smaller than 6, what does the Grid add up to?", "schema": "CREATE TABLE table_name_5 (grid INTEGER, laps INTEGER)", "sql": "SELECT SUM(grid) FROM table_name_5 WHERE laps < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 31).", "schema": null, "sql": "CREATE FUNCTION dblink_build_sql_insert (text, int2vector, int, _text, _text)\nRETURNS text\nAS 'MODULE_PATHNAME','dblink_build_sql_insert'\nLANGUAGE C STRICT PARALLEL RESTRICTED;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "What is the total amount of climate finance invested in renewable energy projects in the Latin America and Caribbean region in 2022?", "schema": "CREATE TABLE climate_finance (country VARCHAR(255), sector VARCHAR(255), investment_amount NUMERIC, region VARCHAR(255), year INT);", "sql": "SELECT SUM(investment_amount) FROM climate_finance WHERE sector = 'renewable energy' AND region = 'Latin America and Caribbean' AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'replica_identity' (example 3).", "schema": null, "sql": "CREATE TABLE test_replica_identity_t3 (id serial constraint pk primary key deferrable);", "explanation": "DDL from PostgreSQL core regression test for Replica Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Position has Drawn larger than 1, and a Played smaller than 14?", "schema": "CREATE TABLE table_name_34 (position INTEGER, drawn VARCHAR, played VARCHAR)", "sql": "SELECT MAX(position) FROM table_name_34 WHERE drawn > 1 AND played < 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did Steve Gotsche win?", "schema": "CREATE TABLE table_name_26 (venue VARCHAR, champion VARCHAR)", "sql": "SELECT venue FROM table_name_26 WHERE champion = 'steve gotsche';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Which building materials were used in ProjectId 2 with a quantity greater than 350?", "schema": "CREATE TABLE BuildingMaterials (Id INT, ProjectId INT, Material VARCHAR(50), Quantity INT, Cost DECIMAL(10,2)); INSERT INTO BuildingMaterials (Id, ProjectId, Material, Quantity, Cost) VALUES (1, 1, 'Concrete', 500, 4500.00); INSERT INTO BuildingMaterials (Id, ProjectId, Material, Quantity, Cost) VALUES (2, 2, 'Steel', 300, 7000.00);", "sql": "SELECT * FROM BuildingMaterials WHERE ProjectId = 2 AND Quantity > 350;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Record has a Visitor of pittsburgh, and a Score of 4–0?", "schema": "CREATE TABLE table_name_23 (record VARCHAR, visitor VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_23 WHERE visitor = 'pittsburgh' AND score = '4–0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 18-49 rating for weekly rank of 30", "schema": "CREATE TABLE table_name_1 (weekly_rank___number_ VARCHAR)", "sql": "SELECT 18 AS _49__rating_share_ FROM table_name_1 WHERE weekly_rank___number_ = '30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the minimum salary of employees who have not received any ethical AI training?", "schema": "CREATE TABLE trainings(id INT, employee_id INT, hours INT); INSERT INTO trainings(id, employee_id, hours) VALUES (1, 1, 5); INSERT INTO trainings(id, employee_id, hours) VALUES (2, 2, 10); INSERT INTO trainings(id, employee_id, hours) VALUES (3, 3, 15); CREATE TABLE employees(id INT, name TEXT, salary FLOAT, training_id INT); INSERT INTO employees(id, name, salary, training_id) VALUES (1, 'John', 80000.0, 1); INSERT INTO employees(id, name, salary, training_id) VALUES (2, 'Jane', 85000.0, 2); INSERT INTO employees(id, name, salary, training_id) VALUES (3, 'Jim', 90000.0, NULL);", "sql": "SELECT MIN(salary) FROM employees WHERE training_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Find the names of all government agencies that have a budget lower than the average budget for all agencies in the 'Federal' schema.", "schema": "CREATE TABLE Federal.agencies (name VARCHAR(50), budget INT); INSERT INTO Federal.agencies (name, budget) VALUES ('Department of Defense', 7000000000), ('Department of Education', 70000000), ('Department of Health and Human Services', 1200000000), ('Department of State', 50000000), ('Department of Transportation', 80000000);", "sql": "SELECT name FROM Federal.agencies WHERE budget < (SELECT AVG(budget) FROM Federal.agencies);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Composite (example 73).", "schema": null, "sql": "-- check what happens if the output record descriptor changes\nCREATE FUNCTION return_record(t text) RETURNS record AS $$\nreturn {'t': t, 'val': 10}\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Composite.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Identify the number of legal aid clinics in rural areas with a focus on immigration law", "schema": "CREATE TABLE legal_aid_clinics (clinic_id INT, area_type VARCHAR(255), focus_area VARCHAR(255)); INSERT INTO legal_aid_clinics (clinic_id, area_type, focus_area) VALUES (1, 'Urban', 'Civil'), (2, 'Rural', 'Immigration'), (3, 'Urban', 'Criminal'), (4, 'Suburban', 'Family'), (5, 'Rural', 'Immigration'), (6, 'Urban', 'Employment');", "sql": "SELECT COUNT(*) FROM legal_aid_clinics WHERE area_type = 'Rural' AND focus_area = 'Immigration';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total snowfall per year for the past 5 years?", "schema": "CREATE TABLE SnowfallData (id INT, year INT, month INT, snowfall FLOAT); INSERT INTO SnowfallData (id, year, month, snowfall) VALUES (1, 2017, 1, 15.2), (2, 2017, 2, 13.5), (3, 2017, 3, 16.3);", "sql": "SELECT year, SUM(snowfall) FROM SnowfallData WHERE year IN (2017, 2018, 2019, 2020, 2021) GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue for the friendly competition at Dreisamstadion, Freiburg?", "schema": "CREATE TABLE table_name_52 (score VARCHAR, competition VARCHAR, venue VARCHAR)", "sql": "SELECT score FROM table_name_52 WHERE competition = 'friendly' AND venue = 'dreisamstadion, freiburg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 357).", "schema": null, "sql": "select v, v is null as \"is null\" from string_to_table(NULL, '|') g(v);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select v, v is null as \"is null\" from string_to_table(NULL, '|') g(v)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average interview score from Kentucky?", "schema": "CREATE TABLE table_name_27 (interview INTEGER, state VARCHAR)", "sql": "SELECT AVG(interview) FROM table_name_27 WHERE state = 'kentucky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Insert a new record of a new budget allocation for the 'Health' department in the 'BudgetAllocation' table", "schema": "CREATE TABLE BudgetAllocation (department VARCHAR(20), budget INT);", "sql": "INSERT INTO BudgetAllocation (department, budget) VALUES ('Health', 800000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 244).", "schema": null, "sql": "-- Test that the WHEN clause is set properly to partitions\ncreate table parted_trigger (a int, b text) partition by range (a);", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "What is the minimum number of workers on a single project in the state of New York?", "schema": "CREATE TABLE Projects (project_id INT, state VARCHAR(255), num_workers INT); INSERT INTO Projects (project_id, state, num_workers) VALUES (1, 'New York', 10), (2, 'New York', 5);", "sql": "SELECT MIN(num_workers) FROM Projects WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the most rebounds in the game against New York?", "schema": "CREATE TABLE table_name_74 (high_rebounds VARCHAR, opponent VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_74 WHERE opponent = 'new york';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many patients participated in clinical trial 'Trial-A' for drug 'ABC-456'?", "schema": "CREATE TABLE clinical_trials (trial_name TEXT, drug_name TEXT, patient_count INT); INSERT INTO clinical_trials (trial_name, drug_name, patient_count) VALUES ('Trial-A', 'ABC-456', 200), ('Trial-B', 'DEF-789', 300);", "sql": "SELECT patient_count FROM clinical_trials WHERE trial_name = 'Trial-A' AND drug_name = 'ABC-456';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Find policyholders who have not filed a claim in the past 12 months", "schema": "CREATE TABLE policyholders (policyholder_id INT, policyholder_name TEXT, policyholder_dob DATE); CREATE TABLE claims_info (claim_id INT, policyholder_id INT, claim_date DATE); INSERT INTO policyholders VALUES (1, 'John Doe', '1980-01-01'); INSERT INTO policyholders VALUES (2, 'Jane Smith', '1990-02-02'); INSERT INTO claims_info VALUES (1, 1, '2020-01-01'); INSERT INTO claims_info VALUES (2, 1, '2020-06-01'); INSERT INTO claims_info VALUES (3, 2, '2019-02-01');", "sql": "SELECT policyholder_id, policyholder_name FROM policyholders LEFT JOIN claims_info USING (policyholder_id) WHERE claim_date IS NULL OR claim_date < (CURRENT_DATE - INTERVAL '12 months');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Show all network investments made in the year 2020", "schema": "CREATE SCHEMA telecom; CREATE TABLE network_investments (year INT, amount FLOAT); INSERT INTO telecom.network_investments (year, amount) VALUES (2018, 1000000), (2019, 1500000), (2020, 2000000), (2021, 2500000);", "sql": "SELECT * FROM telecom.network_investments WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Year-over-year percentage change in energy consumption from 2020 to 2025?", "schema": "CREATE TABLE energy_consumption_yearly (year INT, consumption FLOAT); INSERT INTO energy_consumption_yearly (year, consumption) VALUES (2020, 50000.0), (2021, 55000.1), (2022, 60000.2), (2023, 65000.3), (2024, 70000.4), (2025, 75000.5);", "sql": "SELECT ec1.year + INTERVAL '1 year' AS year, (ec2.consumption - ec1.consumption) / ec1.consumption * 100.0 AS percentage_change FROM energy_consumption_yearly ec1 JOIN energy_consumption_yearly ec2 ON ec1.year + 1 = ec2.year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "What is the total budget for programs with a budget over $5000?", "schema": "CREATE TABLE Programs (id INT, program TEXT, budget DECIMAL(10,2)); INSERT INTO Programs (id, program, budget) VALUES (1, 'Feeding the Hungry', 5000.00), (2, 'Clothing Drive', 3000.00), (3, 'Education', 7000.00);", "sql": "SELECT SUM(budget) FROM Programs WHERE budget > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the average recycling rate for glass in the city of London for the years 2018 and 2019?", "schema": "CREATE TABLE recycling_rates (city VARCHAR(255), year INT, material_type VARCHAR(255), recycling_rate DECIMAL(5,2)); INSERT INTO recycling_rates (city, year, material_type, recycling_rate) VALUES ('London', 2018, 'Glass', 0.25), ('London', 2018, 'Plastic', 0.35), ('London', 2019, 'Glass', 0.30), ('London', 2019, 'Plastic', 0.40);", "sql": "SELECT AVG(recycling_rate) FROM recycling_rates WHERE city = 'London' AND material_type = 'Glass' AND year IN (2018, 2019);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 485).", "schema": null, "sql": "SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the percentage of consumers who prefer cruelty-free products in the database?", "schema": "CREATE TABLE Consumer_Preference_CF (id INT, consumer_id INT, cruelty_free BOOLEAN); INSERT INTO Consumer_Preference_CF (id, consumer_id, cruelty_free) VALUES (1, 1001, true), (2, 1002, true), (3, 1003, false), (4, 1004, true), (5, 1005, false);", "sql": "SELECT (COUNT(DISTINCT consumer_id) FILTER (WHERE cruelty_free = true)) * 100.0 / COUNT(DISTINCT consumer_id) as percentage FROM Consumer_Preference_CF;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the final for finland, who placed greater than 2 and had an all around larger than 18.9?", "schema": "CREATE TABLE table_name_26 (final INTEGER, all_around VARCHAR, place VARCHAR, nation VARCHAR)", "sql": "SELECT SUM(final) FROM table_name_26 WHERE place > 2 AND nation = 'finland' AND all_around > 18.9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Silver medals for the Nation with less than 1 Bronze?", "schema": "CREATE TABLE table_name_84 (silver VARCHAR, bronze INTEGER)", "sql": "SELECT COUNT(silver) FROM table_name_84 WHERE bronze < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Update the name of the founder of Startup X to 'Founder Y' in the diversity metrics table", "schema": "CREATE TABLE diversity_metrics(id INT, company_name VARCHAR(50), founder_name VARCHAR(50), gender VARCHAR(10), age INT); INSERT INTO diversity_metrics VALUES (1, 'Startup X', 'Founder A', 'Male', 40); INSERT INTO diversity_metrics VALUES (2, 'Startup Y', 'Founder B', 'Female', 35); INSERT INTO diversity_metrics VALUES (3, 'Startup Z', 'Founder C', 'Non-binary', 32);", "sql": "UPDATE diversity_metrics SET founder_name = 'Founder Y' WHERE company_name = 'Startup X';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was jerry marion drafted?", "schema": "CREATE TABLE table_13758243_1 (draft_year INTEGER, player VARCHAR)", "sql": "SELECT MAX(draft_year) FROM table_13758243_1 WHERE player = 'Jerry Marion';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the winning % when there were 322 goals?", "schema": "CREATE TABLE table_2259285_1 (winning_pct__percentage VARCHAR, goals_for VARCHAR)", "sql": "SELECT winning_pct__percentage FROM table_2259285_1 WHERE goals_for = 322;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the week 5 after Dani Dior as week 1?", "schema": "CREATE TABLE table_name_36 (week_5 VARCHAR, week_1 VARCHAR)", "sql": "SELECT week_5 FROM table_name_36 WHERE week_1 = 'dani dior';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average number of support programs implemented per month for the deaf and hard of hearing community in the Atlantic region in 2022?", "schema": "CREATE TABLE SupportPrograms (ProgramID INT, DisabilityType VARCHAR(50), Region VARCHAR(50), ImplementationMonth INT, ImplementationYear INT); INSERT INTO SupportPrograms (ProgramID, DisabilityType, Region, ImplementationMonth, ImplementationYear) VALUES (1, 'Sign Language Interpretation', 'Atlantic', 1, 2022), (2, 'Hearing Aids', 'Atlantic', 2, 2022), (3, 'Assistive Listening Devices', 'Atlantic', 3, 2022);", "sql": "SELECT AVG(COUNT(ProgramID)) FROM SupportPrograms WHERE DisabilityType = 'Deaf' OR DisabilityType = 'Hard of Hearing' AND Region = 'Atlantic' GROUP BY ImplementationYear, ImplementationMonth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 22).", "schema": null, "sql": "select '{[,z]}'::textmultirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{[,z]}'::textmultirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 384).", "schema": null, "sql": "select dense_rank(3) within group (order by x)\nfrom (values (1),(1),(2),(2),(3),(3),(4)) v(x);", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select dense_rank(3) within group (order by x)\nfrom (values (1),(1),(2),(2),(3),(3),(4)) v(x)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 94, "num_statements": 1} {"question": "What is the total budget allocated to program B and program C?", "schema": "CREATE TABLE budgets_program (budget_id INT, program TEXT, allocated_amount DECIMAL); INSERT INTO budgets_program (budget_id, program, allocated_amount) VALUES (1, 'Program B', 10000.00), (2, 'Program C', 7500.00), (3, 'Program B', 2500.00), (4, 'Program C', 1500.00);", "sql": "SELECT SUM(allocated_amount) FROM budgets_program WHERE program IN ('Program B', 'Program C');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the minimum cost of accommodations provided to students with mobility impairments in the past year?", "schema": "CREATE TABLE accommodations (id INT, student_id INT, type TEXT, cost INT, date DATE); INSERT INTO accommodations (id, student_id, type, cost, date) VALUES (1, 1, 'wheelchair', 500, '2022-01-01'); INSERT INTO accommodations (id, student_id, type, cost, date) VALUES (2, 2, 'note taker', 500, '2022-02-01');", "sql": "SELECT MIN(cost) FROM accommodations WHERE type = 'wheelchair' AND date >= DATE_SUB(NOW(), INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What publishers were involved with product number SCUS-97265?", "schema": "CREATE TABLE table_10875694_11 (publisher VARCHAR, product_no VARCHAR)", "sql": "SELECT publisher FROM table_10875694_11 WHERE product_no = 'SCUS-97265';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Find the total number of peacekeeping operations by region", "schema": "CREATE TABLE peacekeeping_operations (id INT, operation_name VARCHAR(255), region VARCHAR(255)); INSERT INTO peacekeeping_operations (id, operation_name, region) VALUES (1, 'Operation One', 'Africa'), (2, 'Operation Two', 'Europe'), (3, 'Operation Three', 'Africa');", "sql": "SELECT region, COUNT(*) as total_operations FROM peacekeeping_operations GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'generated_stored' (example 35).", "schema": null, "sql": "INSERT INTO gtestx VALUES (11, 1), (22, 2), (33, 3);", "explanation": "DML from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Get the total CO2 emissions per quarter for the facilities located in California.", "schema": "CREATE TABLE facility_location (facility_id INT, state VARCHAR(50), quarter INT, year INT, co2_emissions FLOAT); INSERT INTO facility_location (facility_id, state, quarter, year, co2_emissions) VALUES (1, 'California', 1, 2021, 1500), (1, 'California', 2, 2021, 1600), (1, 'California', 3, 2021, 1700), (1, 'California', 4, 2021, 1800), (2, 'California', 1, 2021, 1200), (2, 'California', 2, 2021, 1300), (2, 'California', 3, 2021, 1400), (2, 'California', 4, 2021, 1500);", "sql": "SELECT state, quarter, year, SUM(co2_emissions) as total_co2_emissions FROM facility_location WHERE state = 'California' GROUP BY state, quarter, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the author for 6y/ae", "schema": "CREATE TABLE table_1620397_2 (author VARCHAR, series_sorted VARCHAR)", "sql": "SELECT author FROM table_1620397_2 WHERE series_sorted = '6Y/AE';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the maximum amount of funds raised by the 'Red Cross' organization in the year 2020?", "schema": "CREATE TABLE funds(id INT, organization TEXT, amount FLOAT, year INT); INSERT INTO funds(id, organization, amount, year) VALUES (1, 'Red Cross', 750000.00, 2020), (2, 'UNICEF', 800000.00, 2020), (3, 'World Vision', 600000.00, 2019);", "sql": "SELECT MAX(amount) FROM funds WHERE organization = 'Red Cross' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total budget for military innovation by each department, only for departments that have spent more than $10 million?", "schema": "CREATE TABLE MilitaryInnovation (id INT, department VARCHAR(50), budget INT);", "sql": "SELECT department, SUM(budget) FROM MilitaryInnovation GROUP BY department HAVING SUM(budget) > 10000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 368).", "schema": null, "sql": "create trigger intercept_insert_child3\n before insert on child3\n for each row execute procedure intercept_insert();", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 117, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 134).", "schema": null, "sql": "SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 149).", "schema": null, "sql": "select '$ ? (@.a < 0.1e+1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@.a < 0.1e+1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 24).", "schema": null, "sql": "INSERT INTO nobarf(data) VALUES('3');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the result for 1980s", "schema": "CREATE TABLE table_19508635_1 (result VARCHAR, theme VARCHAR)", "sql": "SELECT result FROM table_19508635_1 WHERE theme = '1980s';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average population of 'clinics' in the 'health_facilities' table?", "schema": "CREATE TABLE health_facilities (facility_id INT, name VARCHAR(50), type VARCHAR(50), population INT, city VARCHAR(50), state VARCHAR(50));", "sql": "SELECT AVG(population) FROM health_facilities WHERE type = 'clinic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the distribution of tuberculosis cases by race and ethnicity?", "schema": "CREATE TABLE tb_cases (case_id INT, race_ethnicity_id INT, cases_count INT); CREATE TABLE race_ethnicity (race_ethnicity_id INT, race VARCHAR(50), ethnicity VARCHAR(50));", "sql": "SELECT re.race, re.ethnicity, SUM(tc.cases_count) AS total_cases FROM tb_cases tc JOIN race_ethnicity re ON tc.race_ethnicity_id = re.race_ethnicity_id GROUP BY re.race_ethnicity_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 104).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _cexists ( NAME, NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Find countries with high consumer awareness", "schema": "CREATE TABLE consumer_awareness (id INT PRIMARY KEY, country VARCHAR(50), awareness DECIMAL(3,2)); INSERT INTO consumer_awareness (id, country, awareness) VALUES (1, 'Germany', 0.85), (2, 'Italy', 0.70), (3, 'France', 0.80);", "sql": "SELECT country FROM consumer_awareness WHERE awareness >= 0.8;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which country in Asia has the highest number of eco-friendly hotels?", "schema": "CREATE TABLE hotels (hotel_id INT, region VARCHAR(50), rating VARCHAR(10), is_eco_friendly BOOLEAN); INSERT INTO hotels (hotel_id, region, rating, is_eco_friendly) VALUES (1, 'Europe', 'Luxury', false), (2, 'Asia', 'Standard', true), (3, 'America', 'Eco-Friendly', true), (4, 'Asia', 'Standard', true);", "sql": "SELECT region, COUNT(*) as num_hotels FROM hotels WHERE is_eco_friendly = true GROUP BY region ORDER BY num_hotels DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the L2 cache with a 13.5x multi 1?", "schema": "CREATE TABLE table_name_87 (l2_cache VARCHAR, multi_1 VARCHAR)", "sql": "SELECT l2_cache FROM table_name_87 WHERE multi_1 = '13.5x';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the name, height, and number of stories for all buildings in the city of New York with more than 50 floors?", "schema": "CREATE TABLE Buildings (id INT, name VARCHAR(100), height FLOAT, num_stories INT, city VARCHAR(50)); INSERT INTO Buildings (id, name, height, num_stories, city) VALUES (1, 'Empire State Building', 381, 102, 'New York');", "sql": "SELECT name, height, num_stories FROM Buildings WHERE city = 'New York' AND num_stories > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Delete the records of ingredients that were sourced in China in 2021.", "schema": "CREATE TABLE ingredients (ingredient_id INT, name TEXT, sourcing_country TEXT, source_date DATE); INSERT INTO ingredients (ingredient_id, name, sourcing_country, source_date) VALUES (1, 'Water', 'China', '2021-01-01'), (2, 'Glycerin', 'France', '2021-02-15'), (3, 'Retinol', 'USA', '2020-12-10');", "sql": "DELETE FROM ingredients WHERE sourcing_country = 'China' AND source_date >= '2021-01-01' AND source_date <= '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the minimum depth of all sites in the Pacific ocean?", "schema": "CREATE TABLE site (site_id INT, name TEXT, depth FLOAT); INSERT INTO site (site_id, name, depth) VALUES (1, 'Pacific', 123.45);", "sql": "SELECT MIN(depth) FROM site WHERE name = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the authors of submissions and the acceptance results of their submissions.", "schema": "CREATE TABLE acceptance (Result VARCHAR, Submission_ID VARCHAR); CREATE TABLE submission (Author VARCHAR, Submission_ID VARCHAR)", "sql": "SELECT T2.Author, T1.Result FROM acceptance AS T1 JOIN submission AS T2 ON T1.Submission_ID = T2.Submission_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 676).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION enums_are ( NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Insert a new record into the 'professional_development' table", "schema": "CREATE TABLE professional_development (teacher_id INT, course_title VARCHAR(100), date_completed DATE);", "sql": "INSERT INTO professional_development (teacher_id, course_title, date_completed) VALUES (205, 'Trauma-Informed Teaching', '2022-08-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Insert new volunteer records for the first quarter of 2023.", "schema": "CREATE TABLE Volunteers (VolunteerID int, SignUpDate date); INSERT INTO Volunteers (VolunteerID, SignUpDate) VALUES (1, '2022-01-03'), (2, '2022-02-14');", "sql": "INSERT INTO Volunteers (VolunteerID, SignUpDate) VALUES (3, '2023-01-01'), (4, '2023-02-15'), (5, '2023-03-30');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is round2 when round5 is more than 35, round3 is less than 51, the rank is smaller than 2 and the team is netherlands?", "schema": "CREATE TABLE table_name_54 (round2 VARCHAR, rank VARCHAR, team VARCHAR, round5 VARCHAR, round3 VARCHAR)", "sql": "SELECT COUNT(round2) FROM table_name_54 WHERE round5 > 35 AND round3 < 51 AND team = 'netherlands' AND rank < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest total goals when position is df, and FA Cup Goals is smaller than 0?", "schema": "CREATE TABLE table_name_89 (total_goals INTEGER, position VARCHAR, fa_cup_goals VARCHAR)", "sql": "SELECT MIN(total_goals) FROM table_name_89 WHERE position = 'df' AND fa_cup_goals < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average historical context age for artifacts from the 'Egyptian Digs' site?", "schema": "CREATE TABLE If Not Exists historical_contexts (context_id INT, context_age INT, site_id INT); INSERT INTO historical_contexts (context_id, context_age, site_id) VALUES (1, 3500, 1), (2, 2500, 1), (3, 5000, 3), (4, 1500, 2), (5, 4000, 3);", "sql": "SELECT AVG(context_age) FROM historical_contexts JOIN excavation_sites ON historical_contexts.site_id = excavation_sites.site_id WHERE excavation_sites.site_name = 'Egyptian Digs';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Display the number of electric bicycles and electric scooters in the micro_mobility view.", "schema": "CREATE VIEW micro_mobility AS SELECT 'ebike' AS vehicle_type, COUNT(*) AS quantity UNION ALL SELECT 'escooter', COUNT(*);", "sql": "SELECT vehicle_type, quantity FROM micro_mobility;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Find the number of disability advocacy events held in each city and the total cost for each city.", "schema": "CREATE TABLE disability_advocacy_events (event_id INT, city VARCHAR(255), date DATE, cost INT); INSERT INTO disability_advocacy_events (event_id, city, date, cost) VALUES (1, 'New York', '2021-03-22', 5000); INSERT INTO disability_advocacy_events (event_id, city, date, cost) VALUES (2, 'Los Angeles', '2021-04-01', 7000);", "sql": "SELECT city, COUNT(*) as num_events, SUM(cost) as total_cost FROM disability_advocacy_events GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "PostgreSQL regression test 'timetz': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT EXTRACT(SECOND FROM TIME WITH TIME ZONE '2020-05-26 13:30:25.575401-04');", "explanation": "Regression test for Timetz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(SECOND FROM TIME WITH TIME ZONE '2020-05-26 13:30:25.575401-04')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Who are the patients that received therapy in 2022?", "schema": "CREATE TABLE patients (id INT, name TEXT, age INT, treatment TEXT, treatment_year INT); INSERT INTO patients (id, name, age, treatment, treatment_year) VALUES (1, 'John Doe', 35, 'CBT', 2022), (2, 'Jane Smith', 40, 'DBT', 2021);", "sql": "SELECT name FROM patients WHERE treatment LIKE '%CBT%' OR treatment LIKE '%DBT%' AND treatment_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Record of Visitor of toronto st. pats with a Score of 5–4 and a Home of ottawa senators? Question 5", "schema": "CREATE TABLE table_name_48 (record VARCHAR, home VARCHAR, visitor VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_48 WHERE visitor = 'toronto st. pats' AND score = '5–4' AND home = 'ottawa senators';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes in the season were directed by Jeremy Podeswa?", "schema": "CREATE TABLE table_2182654_3 (no_in_season VARCHAR, directed_by VARCHAR)", "sql": "SELECT COUNT(no_in_season) FROM table_2182654_3 WHERE directed_by = 'Jeremy Podeswa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the 1st leg result in the round where team #1 is Iraklis?", "schema": "CREATE TABLE table_19130829_4 (team__number1 VARCHAR)", "sql": "SELECT 1 AS st_leg FROM table_19130829_4 WHERE team__number1 = 'Iraklis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 46).", "schema": null, "sql": "SELECT pg_typeof(JSON_SERIALIZE(NULL));", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_typeof(JSON_SERIALIZE(NULL))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Province, when 2006 is less than 153748, when Date of Official Foundation of Municipality is after 1958, and when City is \"Pakdasht\"?", "schema": "CREATE TABLE table_name_46 (province VARCHAR, city VARCHAR, date_of_official_foundation_of_municipality VARCHAR)", "sql": "SELECT province FROM table_name_46 WHERE 2006 < 153748 AND date_of_official_foundation_of_municipality > 1958 AND city = 'pakdasht';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Shooter, when Total is \"21\"?", "schema": "CREATE TABLE table_name_51 (shooter VARCHAR, total VARCHAR)", "sql": "SELECT shooter FROM table_name_51 WHERE total = '21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times does Switzerland have under 7 golds and less than 3 silvers?", "schema": "CREATE TABLE table_name_17 (total VARCHAR, silver VARCHAR, country VARCHAR, gold VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_17 WHERE country = 'switzerland' AND gold < 7 AND silver < 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 3).", "schema": null, "sql": "CREATE STATISTICS tst;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "What is the maximum investment in climate adaptation projects in the Middle East and North Africa in 2018?", "schema": "CREATE TABLE climate_adaptation_projects (project_id INT, location VARCHAR(50), investment_amount FLOAT, investment_year INT); INSERT INTO climate_adaptation_projects (project_id, location, investment_amount, investment_year) VALUES (1, 'Egypt', 5000000, 2018), (2, 'Morocco', 4000000, 2018), (3, 'Jordan', 3000000, 2018), (4, 'Iraq', 6000000, 2018), (5, 'Tunisia', 2000000, 2018);", "sql": "SELECT MAX(investment_amount) FROM climate_adaptation_projects WHERE location LIKE 'Middle East%' AND investment_year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who wrote the movie positioned at 8 on the list?", "schema": "CREATE TABLE table_2602958_5 (writer_s_ VARCHAR, _number VARCHAR)", "sql": "SELECT writer_s_ FROM table_2602958_5 WHERE _number = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'updatable_views' (example 115).", "schema": null, "sql": "INSERT INTO rw_view2 VALUES (3, 'Row 3') RETURNING old.*, new.*;", "explanation": "DML from PostgreSQL core regression test for Updatable Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 64, "num_statements": 1} {"question": "What is the maximum number of military personnel in each branch?", "schema": "CREATE TABLE Military_Personnel (ID INT, Branch VARCHAR(50), Personnel INT); INSERT INTO Military_Personnel (ID, Branch, Personnel) VALUES (1, 'Army', 500000), (2, 'Navy', 400000), (3, 'Air_Force', 350000);", "sql": "SELECT Branch, MAX(Personnel) FROM Military_Personnel GROUP BY Branch;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the original air date of the episode directed by Ian Barry and written by Philip Dalkin?", "schema": "CREATE TABLE table_18712423_3 (original_air_date VARCHAR, directed_by VARCHAR, written_by VARCHAR)", "sql": "SELECT original_air_date FROM table_18712423_3 WHERE directed_by = 'Ian Barry' AND written_by = 'Philip Dalkin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what Season was Colin Miller (TAS) the Player?", "schema": "CREATE TABLE table_name_34 (season VARCHAR, player VARCHAR)", "sql": "SELECT season FROM table_name_34 WHERE player = 'colin miller (tas)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the most common treatment type for patients with 'PTSD' in 'clinic_TX'?", "schema": "CREATE TABLE clinic_TX (patient_id INT, name VARCHAR(50), primary_diagnosis VARCHAR(50), treatment_type VARCHAR(50)); INSERT INTO clinic_TX (patient_id, name, primary_diagnosis, treatment_type) VALUES (1, 'John Doe', 'PTSD', 'EMDR'), (2, 'Jane Smith', 'PTSD', 'CBT'), (3, 'Alice Johnson', 'PTSD', 'EMDR');", "sql": "SELECT treatment_type, COUNT(*) as count FROM clinic_TX WHERE primary_diagnosis = 'PTSD' GROUP BY treatment_type ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "How many cases were heard in each state last year?", "schema": "CREATE TABLE cases_by_state (state VARCHAR(20), year INT, num_cases INT); INSERT INTO cases_by_state (state, year, num_cases) VALUES ('California', 2021, 1200), ('New York', 2021, 2500), ('Texas', 2021, 1800);", "sql": "SELECT state, SUM(num_cases) as total_cases FROM cases_by_state WHERE year = 2021 GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Identify the farms with the highest and lowest water temperatures for Salmon.", "schema": "CREATE TABLE FarmTemperature (FarmID INT, Species VARCHAR(255), WaterTemp FLOAT); INSERT INTO FarmTemperature (FarmID, Species, WaterTemp) VALUES (1, 'Salmon', 12.3), (2, 'Salmon', 13.1), (3, 'Salmon', 11.9), (4, 'Salmon', 12.8);", "sql": "SELECT FarmID, WaterTemp FROM FarmTemperature WHERE Species = 'Salmon' AND WaterTemp IN (SELECT MAX(WaterTemp), MIN(WaterTemp) FROM FarmTemperature WHERE Species = 'Salmon');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "List all cultural competency policies in California.", "schema": "CREATE TABLE CulturalCompetency (id INT, policy_name TEXT, state TEXT); INSERT INTO CulturalCompetency (id, policy_name, state) VALUES (1, 'Diversity Act 2020', 'California'); INSERT INTO CulturalCompetency (id, policy_name, state) VALUES (2, 'Inclusion Act 2018', 'California');", "sql": "SELECT * FROM CulturalCompetency WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest round that has a draftee from Washington State University?", "schema": "CREATE TABLE table_name_60 (round INTEGER, school VARCHAR)", "sql": "SELECT MAX(round) FROM table_name_60 WHERE school = 'washington state university';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total volume of wastewater treated in Jakarta, Indonesia in 2019 and 2020?", "schema": "CREATE TABLE jakarta_wastewater (year INT, treatment_volume INT); INSERT INTO jakarta_wastewater (year, treatment_volume) VALUES (2019, 500000), (2020, 550000);", "sql": "SELECT jakarta_wastewater.year, SUM(jakarta_wastewater.treatment_volume) as total_treatment_volume FROM jakarta_wastewater WHERE jakarta_wastewater.year IN (2019, 2020) GROUP BY jakarta_wastewater.year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the cast members of episode 3-04?", "schema": "CREATE TABLE table_2570269_3 (cast VARCHAR, episode__number VARCHAR)", "sql": "SELECT cast FROM table_2570269_3 WHERE episode__number = '3-04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Find the number of hotels in each country from the 'hotels' table", "schema": "CREATE TABLE hotels (hotel_id INT, hotel_name VARCHAR(50), country VARCHAR(50));", "sql": "SELECT country, COUNT(*) as hotel_count FROM hotels GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Update the military spending amount for a specific country in the \"military_spending\" table", "schema": "CREATE TABLE military_spending (country VARCHAR(255), year INT, amount FLOAT); INSERT INTO military_spending (country, year, amount) VALUES ('India', 2017, 51.1);", "sql": "UPDATE military_spending SET amount = 55.3 WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 197).", "schema": null, "sql": "SELECT sum(unique1) over (order by four groups between current row and unbounded following),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (order by four groups between current row and unbounded following),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "How many reverse logistics orders were processed in 'EU' region in October 2021?", "schema": "CREATE TABLE reverse_logistics (id INT, region VARCHAR(5), order_date DATE, processed INT); INSERT INTO reverse_logistics VALUES (1, 'EU', '2021-10-01', 20), (2, 'NA', '2021-10-03', 15), (3, 'ASIA', '2021-10-05', 30);", "sql": "SELECT SUM(processed) FROM reverse_logistics WHERE region = 'EU' AND order_date BETWEEN '2021-10-01' AND '2021-10-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the home teams score at Arden Street Oval?", "schema": "CREATE TABLE table_name_50 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_50 WHERE venue = 'arden street oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: where is the enrollment 4259?", "schema": "CREATE TABLE table_1715730_2 (location VARCHAR, enrollment VARCHAR)", "sql": "SELECT location FROM table_1715730_2 WHERE enrollment = 4259;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what circuit was the iii Reims Grand Prix held?", "schema": "CREATE TABLE table_name_43 (circuit VARCHAR, race_name VARCHAR)", "sql": "SELECT circuit FROM table_name_43 WHERE race_name = 'iii reims grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What are the unique sources used in articles on 'corruption' or 'government' in 'investigative_reports'?", "schema": "CREATE TABLE investigative_reports (title VARCHAR(255), source VARCHAR(255), topic VARCHAR(255));", "sql": "SELECT DISTINCT source FROM investigative_reports WHERE topic IN ('corruption', 'government') ORDER BY source;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 189).", "schema": null, "sql": "-- is_normal_function( NAME, NAME[] )\n-- isnt_normal_function( NAME, NAME[] )\nSELECT * FROM check_test(\n is_normal_function( 'yay', '{}'::name[] ),\n true,\n 'is_normal_function(func, noargs)',\n 'Function yay() should be a normal function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 260, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: During the hungarian grand prix where the pole position was michael schumacher and the fastest lap was driven by damon hill, what's the total number of rounds of races matching these standards?", "schema": "CREATE TABLE table_name_17 (round VARCHAR, grand_prix VARCHAR, fastest_lap VARCHAR, pole_position VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_17 WHERE fastest_lap = 'damon hill' AND pole_position = 'michael schumacher' AND grand_prix = 'hungarian grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Update the email address of the student with ID 10 in the \"Students\" table", "schema": "CREATE TABLE Students (ID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Email VARCHAR(50));", "sql": "UPDATE Students SET Email = 'new.email@example.com' WHERE ID = 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 66, "num_statements": 1} {"question": "Which indigenous communities live in climates with temperatures below -25 degrees Celsius?", "schema": "CREATE TABLE Climate (id INT PRIMARY KEY, location VARCHAR(255), temperature DECIMAL(5,2)); CREATE TABLE IndigenousCommunities (id INT PRIMARY KEY, name VARCHAR(255), climate_id INT, FOREIGN KEY (climate_id) REFERENCES Climate(id));", "sql": "SELECT IndigenousCommunities.name FROM IndigenousCommunities INNER JOIN Climate ON IndigenousCommunities.climate_id = Climate.id WHERE Climate.temperature < -25;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Insert a new record for a military innovation in the 'innovations' table", "schema": "CREATE TABLE innovations (id INT PRIMARY KEY, innovation_name VARCHAR(100), description TEXT, category VARCHAR(50), funding FLOAT);", "sql": "INSERT INTO innovations (innovation_name, description, category, funding) VALUES ('Stealth Drone', 'Unmanned aerial vehicle with low radar cross-section', 'Air', 5000000.00);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the total timber production for each region in the last 5 years?", "schema": "CREATE TABLE timber_production (region VARCHAR(255), production INTEGER, year INTEGER);", "sql": "SELECT region, SUM(production) FROM timber_production WHERE year BETWEEN YEAR(CURRENT_DATE)-5 AND YEAR(CURRENT_DATE) GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest Earnings that has a Money list rank of 6 and Starts smaller than 22?", "schema": "CREATE TABLE table_name_93 (earnings__ INTEGER, money_list_rank VARCHAR, starts VARCHAR)", "sql": "SELECT MIN(earnings__) AS $_ FROM table_name_93 WHERE money_list_rank = '6' AND starts < 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (triggers, item 21).", "schema": null, "sql": "-- statement triggers: AFTER\nCREATE TRIGGER _0_test_trigger_insert_s_after\n AFTER INSERT ON hyper\n FOR EACH STATEMENT EXECUTE FUNCTION test_trigger();", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 156, "num_statements": 1} {"question": "What is the total number of fish species in the 'Baltic Sea' region?", "schema": "CREATE TABLE fish_species (id INT, name TEXT, region TEXT); INSERT INTO fish_species (id, name, region) VALUES (1, 'Herring', 'Baltic Sea'), (2, 'Salmon', 'Baltic Sea'), (3, 'Cod', 'North Atlantic'); CREATE TABLE aquatic_farms (id INT, name TEXT, region TEXT); INSERT INTO aquatic_farms (id, name, region) VALUES (1, 'Farm G', 'Baltic Sea'), (2, 'Farm H', 'Mediterranean Sea');", "sql": "SELECT COUNT(DISTINCT fish_species.name) FROM fish_species INNER JOIN aquatic_farms ON fish_species.region = aquatic_farms.region WHERE aquatic_farms.region = 'Baltic Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the total cost of ingredients sourced from sustainable suppliers for each product category?", "schema": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, supplier_id INT, cost DECIMAL(10,2)); CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT, is_sustainable BOOLEAN); CREATE TABLE products (product_id INT, product_name TEXT, product_category TEXT);", "sql": "SELECT products.product_category, SUM(ingredients.cost) as total_cost FROM ingredients INNER JOIN suppliers ON ingredients.supplier_id = suppliers.supplier_id INNER JOIN products ON ingredients.product_id = products.product_id WHERE suppliers.is_sustainable = TRUE GROUP BY products.product_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 300, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Call (example 77).", "schema": null, "sql": "-- test in non-atomic context\nDO $$\nBEGIN\n UPDATE t_test SET x = x + 1;\n RAISE NOTICE 'f_get_x(%)', f_get_x();\n CALL f_print_x(f_get_x());\n UPDATE t_test SET x = x + 1;\n RAISE NOTICE 'f_get_x(%)', f_get_x();\n CALL f_print_x(f_get_x());\n ROLLBACK;\nEND\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Call.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 261, "num_statements": 8} {"question": "What is the total number of humanitarian assistance personnel deployed by organizations from the Middle East?", "schema": "CREATE TABLE humanitarian_assistance (id INT PRIMARY KEY, organization VARCHAR(100), personnel INT, region VARCHAR(50)); INSERT INTO humanitarian_assistance (id, organization, personnel, region) VALUES (1, 'Org 1', 1200, 'Asia-Pacific'), (2, 'Org 2', 1500, 'Middle East'), (3, 'Org 3', 1000, 'Europe');", "sql": "SELECT SUM(personnel) FROM humanitarian_assistance WHERE region = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Record (example 43).", "schema": null, "sql": "SELECT * FROM test_type_record_as('obj', null, null, false);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Determine the total value of all transactions for a specific customer in the last year.", "schema": "CREATE TABLE customer_transactions (transaction_id INT, customer_id INT, transaction_date DATE, transaction_value DECIMAL(10, 2)); INSERT INTO customer_transactions (transaction_id, customer_id, transaction_date, transaction_value) VALUES (1, 1, '2022-01-01', 100.00), (2, 1, '2022-02-01', 200.00), (3, 2, '2022-03-01', 150.00), (4, 1, '2022-04-01', 300.00);", "sql": "SELECT SUM(transaction_value) FROM customer_transactions WHERE customer_id = 1 AND transaction_date >= DATE_SUB(NOW(), INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the total number of games released by each studio, and the percentage of games that are VR?", "schema": "CREATE TABLE games (game_id INT, game_name TEXT, genre TEXT, studio TEXT, is_vr BOOLEAN); INSERT INTO games (game_id, game_name, genre, studio, is_vr) VALUES (1, 'Half-Life: Alyx', 'Virtual Reality', 'Valve', true), (2, 'Portal 2', 'Puzzle', 'Valve', false), (3, 'Left 4 Dead 2', 'Co-op FPS', 'Valve', false);", "sql": "SELECT games.studio, COUNT(games.game_id) AS total_games, (COUNT(games.game_id) FILTER (WHERE games.is_vr = true) * 100.0 / COUNT(games.game_id)) AS percentage_vr FROM games GROUP BY games.studio;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 196, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When were the credentials presented for new jersey with a status of foreign service officer?", "schema": "CREATE TABLE table_name_59 (credentials_presented VARCHAR, state VARCHAR, status VARCHAR)", "sql": "SELECT credentials_presented FROM table_name_59 WHERE state = 'new jersey' AND status = 'foreign service officer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Find the number of projects in the 'public_works' table that were started in each month of the year, for the year 2020.", "schema": "CREATE TABLE public_works (id INT, name VARCHAR(50), location VARCHAR(50), start_date DATE);", "sql": "SELECT DATEPART(year, start_date) as year, DATEPART(month, start_date) as month, COUNT(*) as num_projects FROM public_works WHERE start_date >= '2020-01-01' AND start_date < '2021-01-01' GROUP BY DATEPART(year, start_date), DATEPART(month, start_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "What is the total cost of all lifelong learning programs offered in the region of Tokyo?", "schema": "CREATE TABLE regions (region_name VARCHAR(255), region_id INT); CREATE TABLE lifelong_learning_programs (program_id INT, program_name VARCHAR(255), region_id INT, program_cost DECIMAL(10,2), PRIMARY KEY (program_id), FOREIGN KEY (region_id) REFERENCES regions(region_id));", "sql": "SELECT SUM(lifelong_learning_programs.program_cost) FROM lifelong_learning_programs INNER JOIN regions ON lifelong_learning_programs.region_id = regions.region_id WHERE regions.region_name = 'Tokyo';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowtypes' (example 19).", "schema": null, "sql": "create temp table people (fn fullname, bd date);", "explanation": "DDL from PostgreSQL core regression test for Rowtypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 25).", "schema": null, "sql": "CREATE OPERATOR - (\n\tLEFTARG = hstore,\n\tRIGHTARG = text,\n\tPROCEDURE = delete\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "What is the combined funding for climate finance initiatives in Europe and North America?", "schema": "CREATE TABLE climate_finance (region VARCHAR(255), funding INT); INSERT INTO climate_finance VALUES ('Europe', 7000000); INSERT INTO climate_finance VALUES ('North America', 9000000);", "sql": "SELECT SUM(funding) FROM climate_finance WHERE region IN ('Europe', 'North America');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'insert' (example 166).", "schema": null, "sql": "create table part_ee_ff3 partition of part_ee_ff for values from (20) to (30) partition by range (b);", "explanation": "DDL from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 745).", "schema": null, "sql": "select jsonb_path_query_tz(\n\t'[\"2017-03-10 12:34:00\", \"2017-03-10 12:35:00\", \"2017-03-10 12:36:00\", \"2017-03-10 12:35:00+01\", \"2017-03-10 13:35:00+01\", \"2017-03-10 12:35:00-01\", \"2017-03-10\", \"2017-03-11\"]',\n\t'$[*].datetime() ? (@ < \"2017-03-10 12:35:00\".timestamp())');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query_tz(\n\t'[\"2017-03-10 12:34:00\", \"2017-03-10 12:35:00\", \"2017-03-10 12:36:00\", \"2017-03-10 12:35:00+01\", \"2017-03-10 13:35:00+01\", \"2017-03-10 12:35:00-01\", \"2017-03-10\", \"2017-03-11\"]',\n\t'$[*].datetime() ? (@ < \"2017-03-10 12:35:00\".timestamp())')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Away team of the Merthyr Tydfil Home game with a Score of 1–1?", "schema": "CREATE TABLE table_name_46 (away_team VARCHAR, score VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_46 WHERE score = '1–1' AND home_team = 'merthyr tydfil';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the region containing the Constitutional Province of Callao?", "schema": "CREATE TABLE table_1672804_2 (region VARCHAR, province VARCHAR)", "sql": "SELECT region FROM table_1672804_2 WHERE province = 'Constitutional province of Callao';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For player is adam wiesel mention all the college/junior/club team", "schema": "CREATE TABLE table_2781227_4 (college_junior_club_team VARCHAR, player VARCHAR)", "sql": "SELECT college_junior_club_team FROM table_2781227_4 WHERE player = 'Adam Wiesel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show the number of veterans hired in each state for the last 6 months, excluding California", "schema": "CREATE TABLE veteran_employment (employment_id INT, veteran_state VARCHAR(2), hire_date DATE); INSERT INTO veteran_employment (employment_id, veteran_state, hire_date) VALUES (1, 'CA', '2021-02-15'), (2, 'TX', '2021-08-24'), (3, 'NY', '2021-07-02'), (4, 'CA', '2021-11-10'), (5, 'NJ', '2021-05-15');", "sql": "SELECT veteran_state, COUNT(*) AS hires FROM veteran_employment WHERE hire_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND veteran_state != 'CA' GROUP BY veteran_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2008 for the 2009 ch our charming lady?", "schema": "CREATE TABLE table_name_89 (Id VARCHAR)", "sql": "SELECT 2008 FROM table_name_89 WHERE 2009 = 'ch our charming lady';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Rider, when Grid is less than 16, when Manufacturer is Aprilia, and when Time is +28.288?", "schema": "CREATE TABLE table_name_23 (rider VARCHAR, time VARCHAR, grid VARCHAR, manufacturer VARCHAR)", "sql": "SELECT rider FROM table_name_23 WHERE grid < 16 AND manufacturer = 'aprilia' AND time = '+28.288';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the percentage of autonomous vehicle accidents per month, partitioned by vehicle make?", "schema": "CREATE TABLE AutonomousVehicleAccidents (id INT, accident_date DATE, make VARCHAR(20), model VARCHAR(20)); INSERT INTO AutonomousVehicleAccidents (id, accident_date, make, model) VALUES (1, '2021-01-01', 'Tesla', 'Model S'), (2, '2021-02-01', 'Waymo', 'Chrysler Pacifica'), (3, '2021-02-01', 'Tesla', 'Model 3'), (4, '2021-03-01', 'NVIDIA', 'Cruise AV'), (5, '2021-04-01', 'Tesla', 'Model S');", "sql": "SELECT EXTRACT(MONTH FROM accident_date) AS month, make, COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY EXTRACT(MONTH FROM accident_date)) AS pct_of_accidents FROM AutonomousVehicleAccidents GROUP BY month, make;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 218, "num_statements": 1} {"question": "What's the total value of social impact investments in India?", "schema": "CREATE TABLE investment_values(investment_id INT, investment_type VARCHAR(20), value FLOAT, country VARCHAR(10));", "sql": "SELECT SUM(value) FROM investment_values WHERE investment_type = 'social_impact' AND country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 13).", "schema": null, "sql": "SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL,NULL, 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the average salary of employees by gender?", "schema": "CREATE TABLE employees (id INT, gender VARCHAR(10), salary INT); INSERT INTO employees (id, gender, salary) VALUES (1, 'Male', 5000), (2, 'Female', 5500), (3, 'Non-binary', 5200);", "sql": "SELECT gender, AVG(salary) as avg_salary FROM employees GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 30).", "schema": null, "sql": "SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,\nhasho_bucket, hasho_flag, hasho_page_id FROM\nhash_page_stats(get_raw_page('test_hash_a_idx', 3));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the games with an Involuntary suspension of Season (hurricane Rita)?", "schema": "CREATE TABLE table_name_72 (games VARCHAR, standing VARCHAR)", "sql": "SELECT games FROM table_name_72 WHERE standing = 'involuntary suspension of season (hurricane rita)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 78).", "schema": null, "sql": "CREATE TABLE tbl4 (g int, h int);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 73).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('Feb 15 17:32:01 1997');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the minimum sale price for naval equipment in Japan?", "schema": "CREATE TABLE EquipmentTypeSales (id INT PRIMARY KEY, equipment_type VARCHAR(50), country VARCHAR(50), sale_price DECIMAL(10, 2));", "sql": "SELECT MIN(sale_price) FROM EquipmentTypeSales WHERE equipment_type = 'naval' AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What was the average food safety score for each restaurant in Q1 2021?", "schema": "CREATE TABLE food_inspection (date DATE, restaurant VARCHAR(255), score DECIMAL(3,1)); INSERT INTO food_inspection (date, restaurant, score) VALUES ('2021-01-01', 'Restaurant A', 92.0), ('2021-01-01', 'Restaurant B', 88.0), ('2021-01-02', 'Restaurant A', 94.0), ('2021-01-02', 'Restaurant B', 89.0);", "sql": "SELECT restaurant, AVG(score) as avg_score FROM food_inspection WHERE date BETWEEN '2021-01-01' AND '2021-03-31' GROUP BY restaurant;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_procedure' (example 10).", "schema": null, "sql": "$$;\n\nCALL ptest6(1, 2);\n\nCREATE PROCEDURE ptest6a(inout a anyelement, out b anyelement)\nLANGUAGE SQL\nAS $$\nSELECT $1, $1;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Procedure.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 3} {"question": "What is the distribution of player levels in esports events?", "schema": "CREATE TABLE esports_events (id INT, event VARCHAR(20), player_level INT); INSERT INTO esports_events (id, event, player_level) VALUES (1, 'Event1', 5), (2, 'Event2', 10), (3, 'Event1', 8);", "sql": "SELECT event, player_level, COUNT(*) as count FROM esports_events GROUP BY event, player_level;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest number of seats in a retired vehicle that was started in service in 1981?", "schema": "CREATE TABLE table_name_93 (number_of_seats INTEGER, current_status VARCHAR, year_placed_in_service VARCHAR)", "sql": "SELECT MIN(number_of_seats) FROM table_name_93 WHERE current_status = 'retired' AND year_placed_in_service = '1981';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the to par when the score was 69-69=138?", "schema": "CREATE TABLE table_name_35 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_35 WHERE score = 69 - 69 = 138;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the ratio of completed to total projects for community development initiatives in Africa in the last 5 years?", "schema": "CREATE TABLE CommunityProjects (ProjectID INT, ProjectName VARCHAR(50), Location VARCHAR(50), StartDate DATE, CompletionDate DATE); INSERT INTO CommunityProjects (ProjectID, ProjectName, Location, StartDate, CompletionDate) VALUES (1, 'Clean Water Project', 'Nigeria', '2016-01-01', '2017-12-31'), (2, 'Renewable Energy Initiative', 'Kenya', '2018-01-01', '2019-12-31');", "sql": "SELECT AVG(CASE WHEN StartDate >= DATEADD(YEAR, -5, CURRENT_DATE) THEN 1.0 * COUNT(CASE WHEN CompletionDate IS NOT NULL THEN 1 END) / COUNT(*) ELSE NULL END) FROM CommunityProjects WHERE Location IN ('Nigeria', 'Kenya');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 220, "num_statements": 1} {"question": "How many astronauts are there from Canada who have been to the ISS?", "schema": "CREATE TABLE astronauts(id INT, name VARCHAR(50), country VARCHAR(50), missions VARCHAR(50)); INSERT INTO astronauts VALUES(1, 'Roberta Bondar', 'Canada', 'ISS'), (2, 'David Saint-Jacques', 'Canada', 'ISS');", "sql": "SELECT COUNT(*) FROM astronauts WHERE country = 'Canada' AND missions = 'ISS';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Against where they lost less than 20 games, tied more than 2 of them, and they had Favour less than 11?", "schema": "CREATE TABLE table_name_85 (against INTEGER, favour VARCHAR, lost VARCHAR, draw VARCHAR)", "sql": "SELECT MAX(against) FROM table_name_85 WHERE lost < 20 AND draw > 2 AND favour < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Delete all intelligence reports in the reports table that were written by analysts with the last name \"Johnson\".", "schema": "CREATE TABLE reports (title TEXT, author TEXT, date DATE); INSERT INTO reports (title, author, date) VALUES ('Intelligence Report 1', 'John Smith', '2021-01-01'), ('Intelligence Report 2', 'Jane Johnson', '2021-02-01'), ('Intelligence Report 3', 'Bob Johnson', '2021-03-01');", "sql": "DELETE FROM reports WHERE author LIKE '%Johnson';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location for Lakewood Park Christian?", "schema": "CREATE TABLE table_name_81 (location VARCHAR, school VARCHAR)", "sql": "SELECT location FROM table_name_81 WHERE school = 'lakewood park christian';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average loan amount for socially responsible lenders in Asia, grouped by year?", "schema": "CREATE TABLE Loans (Id INT, Lender VARCHAR(20), Location VARCHAR(20), LoanType VARCHAR(20), LoanAmount DECIMAL(10,2), LoanYear INT); INSERT INTO Loans (Id, Lender, Location, LoanType, LoanAmount, LoanYear) VALUES (1, 'LenderA', 'Asia', 'Socially Responsible', 500.00, 2020), (2, 'LenderB', 'Asia', 'Socially Responsible', 700.00, 2020), (3, 'LenderC', 'Asia', 'Socially Responsible', 600.00, 2021);", "sql": "SELECT AVG(LoanAmount) AS Avg_Loan_Amount, LoanYear FROM Loans WHERE LoanType = 'Socially Responsible' AND Location = 'Asia' GROUP BY LoanYear;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the total length of all vessels in the Atlantic Ocean that are over 100 meters long?", "schema": "CREATE TABLE atlantic_ocean_vessels (id INT, length FLOAT, is_commercial BOOLEAN);", "sql": "SELECT SUM(length) FROM atlantic_ocean_vessels WHERE length > 100 AND is_commercial = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What was the maximum marine protected area size created each year?", "schema": "CREATE TABLE marine_protected_areas (year INT, area_size FLOAT); INSERT INTO marine_protected_areas (year, area_size) VALUES (2010, 25000), (2011, 30000), (2012, 20000), (2013, 35000), (2014, 15000), (2015, 40000);", "sql": "SELECT year, MAX(area_size) OVER(PARTITION BY (year - MOD(year, 5))) as max_area_per_5_years FROM marine_protected_areas;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 121, "num_statements": 1} {"question": "Update the location of the reader with reader_id 1 in the 'audience_demographics' table", "schema": "CREATE TABLE audience_demographics (reader_id INT PRIMARY KEY, age INT, gender VARCHAR(10), location VARCHAR(100));", "sql": "UPDATE audience_demographics SET location = 'Los Angeles, CA' WHERE reader_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who are the candidates when the incumbent is lindsey graham?", "schema": "CREATE TABLE table_1341423_40 (candidates VARCHAR, incumbent VARCHAR)", "sql": "SELECT candidates FROM table_1341423_40 WHERE incumbent = 'Lindsey Graham';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the percentage of patients who visited a hospital vs. clinic?", "schema": "CREATE TABLE visits (id INT, visit_type TEXT, visit_date DATE);", "sql": "SELECT (SUM(CASE WHEN visit_type = 'Hospital' THEN 1 ELSE 0 END) * 100.0 / COUNT(*)) as hospital_percentage,;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "List all policies with their corresponding disability types.", "schema": "CREATE TABLE PolicyDisabilityTypes (PolicyID INT, DisabilityTypeID INT); INSERT INTO PolicyDisabilityTypes (PolicyID, DisabilityTypeID) VALUES (1, 1); INSERT INTO PolicyDisabilityTypes (PolicyID, DisabilityTypeID) VALUES (2, 2);", "sql": "SELECT p.PolicyName, dt.DisabilityType FROM Policies p INNER JOIN PolicyDisabilityTypes pdt ON p.PolicyID = pdt.PolicyID INNER JOIN DisabilityTypes dt ON pdt.DisabilityTypeID = dt.DisabilityTypeID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "What are the total sales for each genre of music in the United States?", "schema": "CREATE TABLE sales (sale_id INT, genre VARCHAR(255), country VARCHAR(255), sales_amount DECIMAL(10,2));", "sql": "SELECT genre, SUM(sales_amount) FROM sales WHERE country = 'United States' GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total number of community development initiatives and their budgets for each country in Oceania?", "schema": "CREATE TABLE community_initiatives (country VARCHAR(50), initiative VARCHAR(50), budget INT); INSERT INTO community_initiatives (country, initiative, budget) VALUES ('Australia', 'Green Spaces', 120000), ('New Zealand', 'Waste Management', 90000);", "sql": "SELECT country, COUNT(*), SUM(budget) FROM community_initiatives WHERE country IN ('Australia', 'New Zealand') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average age at appointment of those attached to security?", "schema": "CREATE TABLE table_name_58 (age_at_appointment INTEGER, portfolio_attachment VARCHAR)", "sql": "SELECT AVG(age_at_appointment) FROM table_name_58 WHERE portfolio_attachment = 'security';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "List the donors who made donations in both the years 2017 and 2020.", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT); CREATE TABLE Donations (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL);", "sql": "SELECT D.DonorName FROM Donors D JOIN Donations DON17 ON D.DonorID = DON17.DonorID JOIN Donations DON20 ON D.DonorID = DON20.DonorID WHERE YEAR(DON17.DonationDate) = 2017 AND YEAR(DON20.DonationDate) = 2020 GROUP BY D.DonorName HAVING COUNT(DISTINCT YEAR(DonationDate)) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 275, "num_statements": 1} {"question": "What is the total quantity of 'Organic Cotton' sourced from 'Africa' by our vendors?", "schema": "CREATE TABLE Vendors (VendorID int, VendorName varchar(50), Country varchar(50)); INSERT INTO Vendors (VendorID, VendorName, Country) VALUES (1, 'VendorA', 'Asia'), (2, 'VendorB', 'Africa'), (3, 'VendorC', 'Europe'); CREATE TABLE Materials (MaterialID int, MaterialName varchar(50), Sustainable bit); INSERT INTO Materials (MaterialID, MaterialName, Sustainable) VALUES (1, 'Organic Cotton', 1), (2, 'Polyester', 0); CREATE TABLE Source (SourceID int, VendorID int, MaterialID int, Quantity int); INSERT INTO Source (SourceID, VendorID, MaterialID, Quantity) VALUES (1, 1, 1, 500), (2, 1, 2, 300), (3, 2, 1, 800), (4, 3, 2, 700);", "sql": "SELECT SUM(Quantity) FROM Source JOIN Materials ON Source.MaterialID = Materials.MaterialID JOIN Vendors ON Source.VendorID = Vendors.VendorID WHERE Vendors.Country = 'Africa' AND Materials.MaterialName = 'Organic Cotton';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many capitals had brest litovsk voivodeship as voivodeship after 1569?", "schema": "CREATE TABLE table_1784514_1 (capital VARCHAR, voivodeship_after_1569 VARCHAR)", "sql": "SELECT COUNT(capital) FROM table_1784514_1 WHERE voivodeship_after_1569 = 'Brest Litovsk Voivodeship';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 20).", "schema": null, "sql": "SELECT *\n FROM pgss_dml_tab\n WHERE a > 9\n ORDER BY a ;", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the status for unemployment rate being 6.7%", "schema": "CREATE TABLE table_22815568_12 (status VARCHAR, unemployment_rate VARCHAR)", "sql": "SELECT status FROM table_22815568_12 WHERE unemployment_rate = '6.7%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many polling percentages were there in October 2008 when is was 30.8% in Aug 2008?", "schema": "CREATE TABLE table_23680576_2 (oct_2008 VARCHAR, aug_2008 VARCHAR)", "sql": "SELECT COUNT(oct_2008) FROM table_23680576_2 WHERE aug_2008 = '30.8%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player was picked after Round 2 with a Pick number larger than 33?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, pick VARCHAR, round VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE pick > 33 AND round > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is Richard Vanquelef from?", "schema": "CREATE TABLE table_24565004_22 (nationality² VARCHAR, name VARCHAR)", "sql": "SELECT nationality² FROM table_24565004_22 WHERE name = 'Richard Vanquelef';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the minimum investment in climate finance in Australia?", "schema": "CREATE TABLE Climate_Finance_Australia (Year INT, Investment DECIMAL(10,2)); INSERT INTO Climate_Finance_Australia (Year, Investment) VALUES (2018, 1500.0), (2019, 2000.0), (2020, 2500.0), (2021, 3000.0);", "sql": "SELECT MIN(Investment) FROM Climate_Finance_Australia;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Insert record for a new safety inspection", "schema": "CREATE TABLE safety_inspections (id INT, union_name VARCHAR(50), inspection_date DATE, passed BOOLEAN);", "sql": "INSERT INTO safety_inspections (id, union_name, inspection_date, passed) VALUES (1, 'United Auto Workers', '2022-06-01', true);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the total funding amount for arts and culture programs in 'California' by 'California Arts Council' in 2022?", "schema": "CREATE TABLE Funding (funding_source VARCHAR(20), region VARCHAR(20), total_funding DECIMAL(10,2)); INSERT INTO Funding (funding_source, region, total_funding) VALUES ('California Arts Council', 'California', 120000, 'Arts Foundation', 'California', 75000);", "sql": "SELECT SUM(total_funding) FROM Funding WHERE funding_source = 'California Arts Council' AND region = 'California' AND YEAR(event_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score when Bolton Wanderers were the away team?", "schema": "CREATE TABLE table_name_38 (score VARCHAR, away_team VARCHAR)", "sql": "SELECT score FROM table_name_38 WHERE away_team = 'bolton wanderers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all regulations that apply to carriers from India.", "schema": "CREATE TABLE Regulation (RegulationID INT, Name VARCHAR(255), Description TEXT, AppliesToCountry VARCHAR(255)); INSERT INTO Regulation (RegulationID, Name, Description, AppliesToCountry) VALUES (2, 'ISPS Code', 'International Ship and Port Facility Security', 'Worldwide'); INSERT INTO Regulation (RegulationID, Name, Description, AppliesToCountry) VALUES (3, 'Ballast Water Management Convention', 'Regulating ballast water management', 'Worldwide');", "sql": "SELECT RegulationID, Name FROM Regulation WHERE AppliesToCountry IN (SELECT Country FROM Carrier WHERE Country = 'India');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 14).", "schema": null, "sql": "SELECT brin_page_items(get_raw_page('test1_a_btree', 0), 'test1_a_idx');", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 334).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (8,0,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "How many investment rounds have been raised by companies with female co-founders?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50), founder_gender VARCHAR(10)); CREATE TABLE investment_round (id INT, company_id INT, round_number INT); INSERT INTO company (id, name, founder_gender) VALUES (1, 'Acme Corp', 'Female'); INSERT INTO investment_round (id, company_id, round_number) VALUES (1, 1, 1); INSERT INTO investment_round (id, company_id, round_number) VALUES (2, 1, 2); INSERT INTO company (id, name, founder_gender) VALUES (2, 'Maple Leaf Technologies', 'Male'); INSERT INTO investment_round (id, company_id, round_number) VALUES (3, 2, 1);", "sql": "SELECT COUNT(*) AS num_investment_rounds FROM company c JOIN investment_round ir ON c.id = ir.company_id WHERE c.founder_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the percentage of union members in each state?", "schema": "CREATE TABLE Union_Membership (state VARCHAR(20), union_member BOOLEAN); INSERT INTO Union_Membership (state, union_member) VALUES ('California', true), ('California', false), ('New York', true);", "sql": "SELECT state, (SUM(CAST(union_member AS INT)) / COUNT(*) * 100) as union_member_percentage FROM Union_Membership GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which game has an opponent of Phoenix Coyotes and was before Dec 9?", "schema": "CREATE TABLE table_name_96 (game INTEGER, opponent VARCHAR, december VARCHAR)", "sql": "SELECT AVG(game) FROM table_name_96 WHERE opponent = 'phoenix coyotes' AND december < 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average ticket price for each music festival genre?", "schema": "CREATE TABLE music_festivals (festival_id INT, genre VARCHAR(255), ticket_price DECIMAL(5,2)); INSERT INTO music_festivals (festival_id, genre, ticket_price) VALUES (1, 'Rock', 200.00), (2, 'Pop', 250.00), (3, 'Jazz', 150.00);", "sql": "SELECT genre, AVG(ticket_price) FROM music_festivals GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the School is Bishop Luers High School, what is the Hometown?", "schema": "CREATE TABLE table_name_83 (hometown VARCHAR, school VARCHAR)", "sql": "SELECT hometown FROM table_name_83 WHERE school = 'bishop luers high school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the maximum and minimum risk score for each risk category in the R&D department?", "schema": "CREATE TABLE risk_scores (score_id INT, department VARCHAR(50), risk_category VARCHAR(50), value DECIMAL(10, 2)); INSERT INTO risk_scores (score_id, department, risk_category, value) VALUES (1, 'R&D', 'Operational Risk', 7.25), (2, 'R&D', 'Compliance Risk', 7.50), (3, 'Marketing', 'Operational Risk', 6.75), (4, 'Marketing', 'Compliance Risk', 7.00);", "sql": "SELECT department, risk_category, MAX(value) AS max_score, MIN(value) AS min_score FROM risk_scores GROUP BY department, risk_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the highest number of yards in years where there were fewer than 51 rushes and more than 12 games?", "schema": "CREATE TABLE table_name_48 (yards INTEGER, rushes VARCHAR, games VARCHAR)", "sql": "SELECT MAX(yards) FROM table_name_48 WHERE rushes < 51 AND games > 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 49).", "schema": null, "sql": "CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp_table);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Calculate the total number of workplaces by industry, differentiating between union and non-union workplaces", "schema": "CREATE TABLE workplaces (id INT, name VARCHAR(255), industry VARCHAR(255), union_status VARCHAR(255), num_employees INT); INSERT INTO workplaces (id, name, industry, union_status, num_employees) VALUES (1, 'ABC Company', 'Manufacturing', 'Union', 500), (2, 'XYZ Corporation', 'Manufacturing', 'Non-Union', 250), (3, 'DEF Industries', 'Retail', 'Union', 300), (4, 'GHI Company', 'Retail', 'Non-Union', 150), (5, 'JKL Industries', 'Construction', 'Union', 200);", "sql": "SELECT industry, union_status, COUNT(*) as 'Total Workplaces' FROM workplaces GROUP BY industry, union_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "How many financial capability programs exist in Southeast Asia?", "schema": "CREATE TABLE if not exists financial_capability_programs (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO financial_capability_programs (id, name, country) VALUES (1, 'Program A', 'Indonesia'), (2, 'Program B', 'Philippines'), (3, 'Program C', 'Malaysia');", "sql": "SELECT COUNT(*) FROM financial_capability_programs WHERE country IN ('Indonesia', 'Philippines', 'Malaysia', 'Singapore', 'Thailand', 'Vietnam');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Which social good technology organizations in the environmental sector have received the least funding in the past 3 years?", "schema": "CREATE TABLE org_funding_env (org_name TEXT, funding_amount INT, funding_year INT, sector TEXT); INSERT INTO org_funding_env (org_name, funding_amount, funding_year, sector) VALUES ('SocialTech6', 50000, 2020, 'environment'), ('SocialTech7', 70000, 2019, 'environment'), ('SocialTech8', 60000, 2018, 'environment'), ('SocialTech9', 80000, 2021, 'environment'), ('SocialTech10', 90000, 2017, 'environment');", "sql": "SELECT org_name, MIN(funding_amount) FROM org_funding_env WHERE sector = 'environment' AND funding_year BETWEEN 2018 AND 2020 GROUP BY org_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was an episode broadcasted on that had a run time of 24:40?", "schema": "CREATE TABLE table_2102714_1 (broadcast_date VARCHAR, run_time VARCHAR)", "sql": "SELECT broadcast_date FROM table_2102714_1 WHERE run_time = '24:40';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average energy consumption per smart city?", "schema": "CREATE TABLE smart_cities (city_name TEXT, energy_consumption FLOAT); INSERT INTO smart_cities VALUES ('CityA', 500.0), ('CityB', 700.0), ('CityC', 300.0);", "sql": "SELECT AVG(energy_consumption) FROM smart_cities;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Date has a Winning driver of ugo sivocci?", "schema": "CREATE TABLE table_name_61 (date VARCHAR, winning_driver VARCHAR)", "sql": "SELECT date FROM table_name_61 WHERE winning_driver = 'ugo sivocci';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of hospital beds in rural hospitals of Hawaii that have less than 150 beds or were built after 2010?", "schema": "CREATE TABLE hospitals (id INT, name TEXT, location TEXT, beds INT, rural BOOLEAN, built DATE); INSERT INTO hospitals (id, name, location, beds, rural, built) VALUES (1, 'Hospital A', 'Hawaii', 120, true, '2011-01-01'), (2, 'Hospital B', 'Hawaii', 100, true, '2012-01-01');", "sql": "SELECT SUM(beds) FROM hospitals WHERE location = 'Hawaii' AND rural = true AND (beds < 150 OR built > '2010-01-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the total energy production by renewable source in Canada for the month of January 2022?", "schema": "CREATE TABLE energy_production (id INT, country VARCHAR(50), source VARCHAR(50), production FLOAT, timestamp TIMESTAMP); INSERT INTO energy_production (id, country, source, production, timestamp) VALUES (1, 'Canada', 'Wind', 500.2, '2022-01-01 10:00:00'), (2, 'Canada', 'Solar', 700.3, '2022-01-02 15:00:00');", "sql": "SELECT source, SUM(production) as total_production FROM energy_production WHERE country = 'Canada' AND timestamp BETWEEN '2022-01-01 00:00:00' AND '2022-01-31 23:59:59' AND source IN ('Wind', 'Solar') GROUP BY source;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What driver won the VII Rand Grand Prix?", "schema": "CREATE TABLE table_name_26 (winning_driver VARCHAR, race_name VARCHAR)", "sql": "SELECT winning_driver FROM table_name_26 WHERE race_name = 'vii rand grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many tries did the club with a try bonus of correct as of 2 June 2009 have?", "schema": "CREATE TABLE table_name_45 (tries_for VARCHAR, try_bonus VARCHAR)", "sql": "SELECT tries_for FROM table_name_45 WHERE try_bonus = 'correct as of 2 june 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What are the names of customers who have invested in the technology sector but not in the healthcare sector?", "schema": "CREATE TABLE Customers (CustomerID INT, Name VARCHAR(50));CREATE TABLE Investments (CustomerID INT, InvestmentType VARCHAR(10), Sector VARCHAR(10));INSERT INTO Customers VALUES (1,'John Doe'),(2,'Jane Smith'),(3,'Bob Johnson');INSERT INTO Investments VALUES (1,'Stocks','Technology'),(1,'Stocks','Healthcare'),(2,'Stocks','Technology'),(3,'Stocks','Healthcare');", "sql": "SELECT DISTINCT c.Name FROM Customers c INNER JOIN Investments i ON c.CustomerID = i.CustomerID WHERE i.Sector = 'Technology' AND c.CustomerID NOT IN (SELECT CustomerID FROM Investments WHERE Sector = 'Healthcare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "What is the total quantity of recycled materials in stock?", "schema": "CREATE TABLE materials (id INT, name TEXT, recycled BOOLEAN); CREATE TABLE inventory (id INT, material_id INT, quantity INT); INSERT INTO materials (id, name, recycled) VALUES (1, 'Material A', true), (2, 'Material B', false), (3, 'Material C', true); INSERT INTO inventory (id, material_id, quantity) VALUES (1, 1, 50), (2, 2, 80), (3, 3, 100);", "sql": "SELECT SUM(inventory.quantity) FROM inventory INNER JOIN materials ON inventory.material_id = materials.id WHERE materials.recycled = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "How many local vendors have participated in sustainable events in Spain?", "schema": "CREATE TABLE LocalVendors (VendorID INT, Country VARCHAR(50), Events INT); INSERT INTO LocalVendors (VendorID, Country, Events) VALUES (1, 'Spain', 3), (2, 'Spain', 2);", "sql": "SELECT SUM(Events) FROM LocalVendors WHERE Country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Identify co-owned properties in New York City with sustainable urbanism certifications that were sold in the past six months, and list their names, addresses, and the names of their co-owners.", "schema": "CREATE TABLE properties (property_id INT, name VARCHAR(255), address VARCHAR(255), city VARCHAR(255), sustainable_urbanism_certified BOOLEAN, sold_date DATE); CREATE TABLE co_owners (property_id INT, owner_name VARCHAR(255)); INSERT INTO properties (property_id, name, address, city, sustainable_urbanism_certified, sold_date) VALUES (1, 'Green Living', '123 Main St', 'New York', true, '2022-03-15'), (2, 'Eco Haven', '456 Oak St', 'New York', false, '2022-01-01'), (3, 'Sustainable Suites', '789 Pine St', 'New York', true, NULL); INSERT INTO co_owners (property_id, owner_name) VALUES (1, 'Alice'), (1, 'Bob'), (2, 'Charlie'), (3, 'Dave');", "sql": "SELECT p.name, p.address, co.owner_name FROM properties p JOIN co_owners co ON p.property_id = co.property_id WHERE p.city = 'New York' AND p.sustainable_urbanism_certified = true AND p.sold_date >= DATEADD(month, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: when points against was 387 what was the lost?", "schema": "CREATE TABLE table_name_8 (lost VARCHAR, points_against VARCHAR)", "sql": "SELECT lost FROM table_name_8 WHERE points_against = '387';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average order value per customer by country?", "schema": "CREATE TABLE Customers (CustomerID INT, CustomerName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Customers VALUES (1, 'John Smith', 'USA'), (2, 'Jane Doe', 'Canada'); CREATE TABLE Orders (OrderID INT, CustomerID INT, OrderValue DECIMAL(10,2)); INSERT INTO Orders VALUES (1, 1, 50.00), (2, 1, 75.00), (3, 2, 100.00);", "sql": "SELECT Country, AVG(OrderValue) as AvgOrderValue FROM Orders o JOIN Customers c ON o.CustomerID = c.CustomerID GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many bronzes have 1 as the total, with spain (esp) as the nation, and a gold greater than 0?", "schema": "CREATE TABLE table_name_31 (bronze VARCHAR, gold VARCHAR, total VARCHAR, nation VARCHAR)", "sql": "SELECT COUNT(bronze) FROM table_name_31 WHERE total = 1 AND nation = 'spain (esp)' AND gold > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total investment in renewable energy sector for the year 2021?", "schema": "CREATE TABLE investments (id INT PRIMARY KEY, investor_id INT, nonprofit_id INT, amount DECIMAL(10,2), investment_date DATE); INSERT INTO investments (id, investor_id, nonprofit_id, amount, investment_date) VALUES (1, 1, 3, 1500.00, '2021-03-01'), (2, 2, 4, 2000.00, '2021-05-01'), (3, 3, 5, 1000.00, '2021-09-01'), (4, 4, 3, 2500.00, '2021-12-01'); CREATE TABLE nonprofits (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), sector VARCHAR(255)); INSERT INTO nonprofits (id, name, location, sector) VALUES (3, 'Greenpeace', 'Germany', 'Renewable Energy'), (4, 'SolarAid', 'UK', 'Renewable Energy'), (5, 'WindAid', 'Peru', 'Renewable Energy');", "sql": "SELECT SUM(amount) FROM investments i JOIN nonprofits n ON i.nonprofit_id = n.id WHERE n.sector = 'Renewable Energy' AND DATE_PART('year', investment_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest earnings for the golfer who has more than 37 wins?", "schema": "CREATE TABLE table_name_90 (earnings___ INTEGER, wins INTEGER)", "sql": "SELECT MAX(earnings___) AS $__ FROM table_name_90 WHERE wins > 37;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 639).", "schema": null, "sql": "CREATE PUBLICATION pub FOR TABLE sch2.tbl1_part1 WITH (PUBLISH_VIA_PARTITION_ROOT=1);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 72).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Jack');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the average cost of cybersecurity strategies for each sector in 2019?", "schema": "CREATE TABLE cybersecurity_strategies (id INT PRIMARY KEY, strategy VARCHAR(50), cost INT, sector VARCHAR(50), year INT); INSERT INTO cybersecurity_strategies (id, strategy, cost, sector, year) VALUES (5, 'Encryption', 75000, 'Public', 2019); INSERT INTO cybersecurity_strategies (id, strategy, cost, sector, year) VALUES (6, 'Intrusion Prevention Systems', 120000, 'Private', 2019);", "sql": "SELECT sector, AVG(cost) as avg_cost FROM cybersecurity_strategies WHERE year = 2019 GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What wast the percent cut for the nation with an 80% full gas storage?", "schema": "CREATE TABLE table_21690339_1 (_percentage_cut VARCHAR, gas_storage VARCHAR)", "sql": "SELECT _percentage_cut FROM table_21690339_1 WHERE gas_storage = '80% full';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Which species have experienced a decrease in dissolved oxygen levels compared to the previous day in brackish water farms?", "schema": "CREATE TABLE daily_oxygen_levels_brackish (farm_id INT, date DATE, species VARCHAR(20), dissolved_oxygen FLOAT); INSERT INTO daily_oxygen_levels_brackish (farm_id, date, species, dissolved_oxygen) VALUES (1, '2022-01-01', 'Tilapia', 5.5), (1, '2022-01-02', 'Tilapia', 5.3), (2, '2022-01-01', 'Barramundi', 6.0), (2, '2022-01-02', 'Barramundi', 5.8);", "sql": "SELECT species, date, dissolved_oxygen, LAG(dissolved_oxygen) OVER (PARTITION BY species ORDER BY date) prev_dissolved_oxygen, dissolved_oxygen - LAG(dissolved_oxygen) OVER (PARTITION BY species ORDER BY date) diff FROM daily_oxygen_levels_brackish WHERE diff < 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 264, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 270).", "schema": null, "sql": "select count(*) from test_range_spgist where ir -|- int4range(100,500);", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_range_spgist where ir -|- int4range(100,500)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which run 2 with final of 5:24.47?", "schema": "CREATE TABLE table_name_18 (run_2 VARCHAR, final VARCHAR)", "sql": "SELECT run_2 FROM table_name_18 WHERE final = '5:24.47';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the distribution of article categories by gender?", "schema": "CREATE TABLE article_categories (title text, category text, author_gender text); INSERT INTO article_categories (title, category, author_gender) VALUES ('Article 7', 'politics', 'Female'); INSERT INTO article_categories (title, category, author_gender) VALUES ('Article 8', 'sports', 'Male');", "sql": "SELECT author_gender, category, COUNT(*) as count FROM article_categories GROUP BY author_gender, category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Which mobile devices in the state of Illinois have more than 1000 prepaid subscribers?", "schema": "CREATE TABLE mobile_devices (device_id INT, device_name VARCHAR(50), mobile_services INT, state VARCHAR(20));", "sql": "SELECT device_name FROM mobile_devices WHERE state = 'Illinois' AND mobile_services = (SELECT mobile_services FROM mobile_customers WHERE plan_type = 'prepaid' GROUP BY mobile_services HAVING COUNT(*) > 1000) GROUP BY device_name HAVING COUNT(*) > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 253, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of party for willis alston (j) 93.9% george e. spruill 6.1%", "schema": "CREATE TABLE table_2668243_18 (party VARCHAR, candidates VARCHAR)", "sql": "SELECT COUNT(party) FROM table_2668243_18 WHERE candidates = 'Willis Alston (J) 93.9% George E. Spruill 6.1%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which webcast was in Spanish contemporary on xhnoe.com?", "schema": "CREATE TABLE table_name_86 (webcast VARCHAR, format VARCHAR, website VARCHAR)", "sql": "SELECT webcast FROM table_name_86 WHERE format = 'spanish contemporary' AND website = 'xhnoe.com';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Which ingredients are used in products that have received a safety violation in the past year and are not cruelty-free certified?", "schema": "CREATE TABLE products (product_id INT, product_name TEXT, is_cruelty_free BOOLEAN); CREATE TABLE ingredient_sources (ingredient_id INT, product_id INT, source_country TEXT); CREATE TABLE safety_records (record_id INT, product_id INT, violation_date DATE);", "sql": "SELECT ingredient_sources.ingredient_id FROM ingredient_sources INNER JOIN products ON ingredient_sources.product_id = products.product_id INNER JOIN safety_records ON products.product_id = safety_records.product_id WHERE safety_records.violation_date >= NOW() - INTERVAL '1 year' AND products.is_cruelty_free = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 318, "num_statements": 1} {"question": "Find the maximum loading capacity for vessels in the 'Passenger' category", "schema": "CREATE TABLE Vessels (VesselID INT, Category VARCHAR(50), LoadingCapacity FLOAT); INSERT INTO Vessels (VesselID, Category, LoadingCapacity) VALUES (1, 'Cargo', 60000), (2, 'Passenger', 3500), (3, 'Cargo', 45000), (4, 'Passenger', 2800);", "sql": "SELECT MAX(LoadingCapacity) FROM Vessels WHERE Category = 'Passenger';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average number of volunteers per program in H2 2022?", "schema": "CREATE TABLE Volunteers (id INT, program_id INT, volunteer_date DATE); INSERT INTO Volunteers (id, program_id, volunteer_date) VALUES (1, 601, '2022-07-10'); INSERT INTO Volunteers (id, program_id, volunteer_date) VALUES (2, 602, '2022-10-15'); INSERT INTO Volunteers (id, program_id, volunteer_date) VALUES (3, 601, '2022-09-15');", "sql": "SELECT AVG(number_of_volunteers) FROM (SELECT program_id, COUNT(DISTINCT user_id) as number_of_volunteers FROM Volunteers WHERE volunteer_date >= '2022-07-01' AND volunteer_date < '2023-01-01' GROUP BY program_id) as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "What is the percentage of accessible technology patents in Europe?", "schema": "CREATE TABLE tech_patents (location VARCHAR(255), is_accessible BOOLEAN); INSERT INTO tech_patents (location, is_accessible) VALUES ('Germany', true), ('France', false), ('UK', true);", "sql": "SELECT location, COUNT(*) * 100.0 / SUM(COUNT(*)) OVER () as percentage_accessible FROM tech_patents WHERE location LIKE 'Europe%' AND is_accessible = true GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was mlb draft for the player who's school was Carl Albert High School?", "schema": "CREATE TABLE table_11677100_18 (mlb_draft VARCHAR, school VARCHAR)", "sql": "SELECT mlb_draft FROM table_11677100_18 WHERE school = 'Carl Albert High school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was outgoing manager Zoltán Varga appointed?", "schema": "CREATE TABLE table_name_77 (date_of_appointment VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT date_of_appointment FROM table_name_77 WHERE outgoing_manager = 'zoltán varga';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the maximum salary for workers in the 'construction_database' database who are members of a union?", "schema": "CREATE TABLE builders (id INT, name VARCHAR(50), salary DECIMAL(10, 2), is_union_member BOOLEAN); INSERT INTO builders (id, name, salary, is_union_member) VALUES (1, 'Mia', 80000.00, true), (2, 'Max', 85000.00, true), (3, 'Mel', 90000.00, true);", "sql": "SELECT MAX(salary) FROM builders WHERE is_union_member = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much does number 26 weigh?", "schema": "CREATE TABLE table_name_31 (weight VARCHAR, number VARCHAR)", "sql": "SELECT weight FROM table_name_31 WHERE number = '26';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Delete all records of pollution data in the Mariana Trench.", "schema": "CREATE TABLE pollution_data (location TEXT, pollution_level INTEGER); INSERT INTO pollution_data (location, pollution_level) VALUES ('Mariana Trench', 3); INSERT INTO pollution_data (location, pollution_level) VALUES ('Atlantic Ocean', 2);", "sql": "DELETE FROM pollution_data WHERE location = 'Mariana Trench';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total duration of classical music pieces longer than 15 minutes?", "schema": "CREATE TABLE compositions (id INT, title VARCHAR(255), genre VARCHAR(255), duration INT); INSERT INTO compositions (id, title, genre, duration) VALUES (1, 'Symphony No. 5', 'Classical', 30), (2, 'Piano Concerto No. 21', 'Classical', 25), (3, 'The Four Seasons', 'Classical', 40), (4, 'Requiem Mass in D minor', 'Classical', 90), (5, 'The Planets', 'Classical', 60);", "sql": "SELECT SUM(duration) FROM compositions WHERE genre = 'Classical' AND duration > 15;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date did Chris Duhon (13) receive high assists?", "schema": "CREATE TABLE table_17060277_6 (date VARCHAR, high_assists VARCHAR)", "sql": "SELECT date FROM table_17060277_6 WHERE high_assists = 'Chris Duhon (13)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "List the names of all drugs that were approved by the TGA and have a sales figure greater than $50 million.", "schema": "CREATE TABLE drug (id INT, name TEXT, approval_authority TEXT, sales FLOAT); INSERT INTO drug (id, name, approval_authority, sales) VALUES (1, 'DrugA', 'TGA', 60000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (2, 'DrugB', 'FDA', 40000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (3, 'DrugC', 'TGA', 55000000); INSERT INTO drug (id, name, approval_authority, sales) VALUES (4, 'DrugD', 'EMA', 45000000);", "sql": "SELECT name FROM drug WHERE approval_authority = 'TGA' AND sales > 50000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many companies operates airlines in each airport?", "schema": "CREATE TABLE airport (id VARCHAR); CREATE TABLE flight (Id VARCHAR); CREATE TABLE operate_company (id VARCHAR)", "sql": "SELECT T3.id, COUNT(*) FROM operate_company AS T1 JOIN flight AS t2 ON T1.id = T2.company_id JOIN airport AS T3 ON T2.airport_id = T3.id GROUP BY T3.id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 11).", "schema": null, "sql": "SELECT * FROM test_float8 WHERE i<1::float4 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the position in 1999?", "schema": "CREATE TABLE table_name_58 (position VARCHAR, years VARCHAR)", "sql": "SELECT position FROM table_name_58 WHERE years = '1999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 457).", "schema": null, "sql": "select jsonb_path_query('true', '$.integer()', silent => true);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('true', '$.integer()', silent => true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many animals were admitted to the rehabilitation center in June 2021?", "schema": "CREATE TABLE rehab_center (animal_id INT, admission_date DATE); INSERT INTO rehab_center (animal_id, admission_date) VALUES (1, '2021-06-01'), (2, '2021-06-15'), (3, '2021-06-27');", "sql": "SELECT COUNT(*) FROM rehab_center WHERE admission_date BETWEEN '2021-06-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the area when the census ranking is 2,290 of 5,008?", "schema": "CREATE TABLE table_name_49 (area_km_2 VARCHAR, census_ranking VARCHAR)", "sql": "SELECT area_km_2 FROM table_name_49 WHERE census_ranking = '2,290 of 5,008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What are the names and launch dates of Mars rovers?", "schema": "CREATE TABLE mars_rovers(rover_name TEXT, launch_date DATE); INSERT INTO mars_rovers (rover_name, launch_date) VALUES ('Sojourner', '1996-12-04'), ('Spirit', '2003-06-10'), ('Opportunity', '2003-07-07');", "sql": "SELECT rover_name, launch_date FROM mars_rovers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date that the st. george-illawarra dragons lost?", "schema": "CREATE TABLE table_11236195_5 (grand_finaldate VARCHAR, losingteam VARCHAR)", "sql": "SELECT grand_finaldate FROM table_11236195_5 WHERE losingteam = 'St. George-Illawarra Dragons';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "How many renewable energy facilities are located in the Asia-Pacific region, and what is their total capacity in MW?", "schema": "CREATE TABLE renewable_facilities (region VARCHAR(50), capacity NUMERIC, technology VARCHAR(50)); INSERT INTO renewable_facilities (region, capacity, technology) VALUES ('Asia-Pacific', 500, 'Solar'), ('Asia-Pacific', 600, 'Wind'), ('Europe', 400, 'Hydro'), ('Africa', 300, 'Geothermal');", "sql": "SELECT region, SUM(capacity) as total_capacity FROM renewable_facilities WHERE region = 'Asia-Pacific' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes directed by ben jones and written by paul dini?", "schema": "CREATE TABLE table_20360535_3 (no VARCHAR, directed_by VARCHAR, written_by VARCHAR)", "sql": "SELECT COUNT(no) FROM table_20360535_3 WHERE directed_by = 'Ben Jones' AND written_by = 'Paul Dini';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Yards has a Long smaller than 3?", "schema": "CREATE TABLE table_name_2 (yards INTEGER, long INTEGER)", "sql": "SELECT AVG(yards) FROM table_name_2 WHERE long < 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: During what years did the Guard from Oklahoma with a height of 6-6 play for the Rockets?", "schema": "CREATE TABLE table_name_41 (years_for_rockets VARCHAR, school_club_team_country VARCHAR, height_in_ft VARCHAR, position VARCHAR)", "sql": "SELECT years_for_rockets FROM table_name_41 WHERE height_in_ft = '6-6' AND position = 'guard' AND school_club_team_country = 'oklahoma';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the total budget for each type of infrastructure project in the transportation division?", "schema": "CREATE TABLE transportation_projects (id INT, name VARCHAR(50), type VARCHAR(50), budget FLOAT); INSERT INTO transportation_projects (id, name, type, budget) VALUES (1, 'Road', 'Construction', 20000000), (2, 'Highway', 'Construction', 50000000), (3, 'Bridge', 'Construction', 35000000), (4, 'Tunnel', 'Construction', 40000000);", "sql": "SELECT type, SUM(budget) FROM transportation_projects WHERE division = 'Transportation' GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number of party had a candidate resigned when appointed judge democratic hold?", "schema": "CREATE TABLE table_1342013_31 (party VARCHAR, result VARCHAR)", "sql": "SELECT COUNT(party) FROM table_1342013_31 WHERE result = 'Resigned when appointed judge Democratic hold';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the average claim amount for policies with a duration greater than 1 year?", "schema": "CREATE TABLE Claims (PolicyID int, ClaimAmount int, PolicyDuration int); INSERT INTO Claims (PolicyID, ClaimAmount, PolicyDuration) VALUES (1, 500, 18), (2, 2000, 3), (3, 800, 24), (4, 1500, 12);", "sql": "SELECT AVG(ClaimAmount) OVER (ORDER BY ROW_NUMBER() OVER (ORDER BY PolicyDuration DESC)) as AvgClaimAmount FROM Claims WHERE PolicyDuration > 12;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Attendance with a Date that is october 31, 1965?", "schema": "CREATE TABLE table_name_15 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT attendance FROM table_name_15 WHERE date = 'october 31, 1965';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Identify the total quantity of dairy products supplied by each supplier in the last month?", "schema": "CREATE TABLE Suppliers(SupplierID INT, Name VARCHAR(50), Type VARCHAR(50));CREATE TABLE DairyProducts(ProductID INT, SupplierID INT, ProductName VARCHAR(50), Quantity INT, DeliveryDate DATE);INSERT INTO Suppliers VALUES (1, 'Dairy Direct', 'Dairy Supplier'), (2, 'Farm Fresh', 'Dairy Supplier'), (3, 'Fruitful', 'Fruit Supplier');INSERT INTO DairyProducts VALUES (1, 1, 'Milk', 200, '2022-05-15'), (2, 1, 'Cheese', 300, '2022-05-01'), (3, 2, 'Butter', 500, '2022-04-20'), (4, 3, 'Apples', 400, '2022-05-10');", "sql": "SELECT s.Name, SUM(dp.Quantity) FROM Suppliers s JOIN DairyProducts dp ON s.SupplierID = dp.SupplierID WHERE dp.DeliveryDate >= DATEADD(month, -1, GETDATE()) AND s.Type = 'Dairy Supplier' GROUP BY s.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "What is the number of marine species, grouped by conservation status?", "schema": "CREATE TABLE marine_species (id INT, species VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO marine_species (id, species, conservation_status) VALUES (1, 'Blue Whale', 'Endangered'); INSERT INTO marine_species (id, species, conservation_status) VALUES (2, 'Green Sea Turtle', 'Vulnerable'); INSERT INTO marine_species (id, species, conservation_status) VALUES (3, 'Clownfish', 'Least Concern');", "sql": "SELECT conservation_status, COUNT(*) FROM marine_species GROUP BY conservation_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the minimum capacity of hospitals in the 'global_health' schema?", "schema": "CREATE SCHEMA global_health; CREATE TABLE hospitals (id INT, name TEXT, location TEXT, capacity INT); INSERT INTO global_health.hospitals (id, name, location, capacity) VALUES (1, 'Hospital A', 'City A', 200), (2, 'Hospital B', 'City B', 300), (3, 'Hospital C', 'City C', 150), (4, 'Hospital D', 'City D', 250), (5, 'Hospital E', 'City E', 400);", "sql": "SELECT MIN(capacity) FROM global_health.hospitals;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "PostgreSQL regression test 'encoding': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT regexp_replace(with_nul, '^caf(.)$', '\\1') FROM regress_encoding;", "explanation": "Regression test for Encoding in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regexp_replace(with_nul, '^caf(.)$', '\\1') FROM regress_encoding) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When columbia, south carolina is the hometown what is the lowest age?", "schema": "CREATE TABLE table_1859855_2 (age INTEGER, hometown VARCHAR)", "sql": "SELECT MIN(age) FROM table_1859855_2 WHERE hometown = 'Columbia, South Carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the FY2008 $mil value associated with a FY2007 $mil value of exactly $120?", "schema": "CREATE TABLE table_25438110_5 (fy08_$millions VARCHAR, fy07_$millions VARCHAR)", "sql": "SELECT fy08_$millions FROM table_25438110_5 WHERE fy07_$millions = '$120';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different players does the Washington Redskins have?", "schema": "CREATE TABLE table_27132791_3 (player VARCHAR, nfl_team VARCHAR)", "sql": "SELECT COUNT(player) FROM table_27132791_3 WHERE nfl_team = 'Washington Redskins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Find the number of Influenza cases reported in South America in 2020.", "schema": "CREATE TABLE FluData (Year INT, Region VARCHAR(20), Cases INT); INSERT INTO FluData (Year, Region, Cases) VALUES (2018, 'North America', 5000); INSERT INTO FluData (Year, Region, Cases) VALUES (2020, 'South America', 3000);", "sql": "SELECT SUM(Cases) FROM FluData WHERE Region = 'South America' AND Year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List title of albums have the number of tracks greater than 10.", "schema": "CREATE TABLE tracks (album_id VARCHAR); CREATE TABLE albums (title VARCHAR, id VARCHAR)", "sql": "SELECT T1.title FROM albums AS T1 JOIN tracks AS T2 ON T1.id = T2.album_id GROUP BY T1.id HAVING COUNT(T1.id) > 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Mark Begich polling on October 6, 2008?", "schema": "CREATE TABLE table_16751596_13 (democrat VARCHAR, dates_administered VARCHAR)", "sql": "SELECT democrat AS :_mark_begich FROM table_16751596_13 WHERE dates_administered = 'October 6, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the percentage listed for the team who scored 63 points?", "schema": "CREATE TABLE table_26200568_16 (percentage VARCHAR, points_for VARCHAR)", "sql": "SELECT percentage FROM table_26200568_16 WHERE points_for = 63;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 431).", "schema": null, "sql": "UPDATE temporal_rng SET valid_at = daterange('2016-01-01', '2016-02-01') WHERE id = '[5,6)';", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 26.", "schema": null, "sql": "CREATE FUNCTION get_userid(username text) RETURNS int AS $$ #print_strict_params on DECLARE userid int; BEGIN SELECT users.userid INTO STRICT userid FROM users WHERE users.username = get_userid.username; RETURN userid; END; $$ LANGUAGE plpgsql;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 244, "num_statements": 5} {"question": "What is the correlation between the number of social media shares and the length of news articles?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), word_count INT, social_media_shares INT);", "sql": "SELECT CORR(word_count, social_media_shares) FROM articles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When itogon, benguet is the city/municipality and 1st class is the income classification how many measurements of population in 2010?", "schema": "CREATE TABLE table_29289372_1 (population__2010_ VARCHAR, income_classification VARCHAR, city_municipality VARCHAR)", "sql": "SELECT COUNT(population__2010_) FROM table_29289372_1 WHERE income_classification = '1st Class' AND city_municipality = 'Itogon, Benguet';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 261).", "schema": null, "sql": "insert into xx values (2);", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Find AI safety incidents in South America with a confidence score below 0.8, ordered by incident date.", "schema": "CREATE TABLE AISafetyIncidents (incident_id INTEGER, confidence FLOAT, incident_date DATE, region TEXT); INSERT INTO AISafetyIncidents (incident_id, confidence, incident_date, region) VALUES (1, 0.75, '2022-01-01', 'South America'), (2, 0.85, '2022-04-01', 'South America');", "sql": "SELECT * FROM AISafetyIncidents WHERE region = 'South America' AND confidence < 0.8 ORDER BY incident_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the total amount of climate finance provided to projects in the Pacific region for climate adaptation by the European Investment Bank?", "schema": "CREATE TABLE european_investment_bank (fund_id INT, project_name VARCHAR(100), country VARCHAR(50), sector VARCHAR(50), amount FLOAT, climate_adaptation_flag BOOLEAN); INSERT INTO european_investment_bank (fund_id, project_name, country, sector, amount, climate_adaptation_flag) VALUES (1, 'Sea Level Rise Protection', 'Tuvalu', 'Infrastructure', 20000000, TRUE);", "sql": "SELECT SUM(amount) FROM european_investment_bank WHERE country LIKE '%%pacific%%' AND climate_adaptation_flag = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many rounds were fought with opponent Kevin Roddy?", "schema": "CREATE TABLE table_name_54 (round VARCHAR, opponent VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_54 WHERE opponent = 'kevin roddy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the maximum energy efficiency rating for commercial buildings in the \"CleanEnergy\" schema?", "schema": "CREATE TABLE CommercialEfficiency (building_id INT, rating FLOAT, city VARCHAR(50), state VARCHAR(50)); INSERT INTO CommercialEfficiency (building_id, rating, city, state) VALUES (1, 85.6, 'LosAngeles', 'CA'), (2, 90.2, 'NewYorkCity', 'NY');", "sql": "SELECT MAX(rating) FROM CleanEnergy.CommercialEfficiency;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'privileges' (example 22).", "schema": null, "sql": "CREATE USER regress_priv_user10;", "explanation": "DDL from PostgreSQL core regression test for Privileges.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Which suppliers provide raw materials for the renewable energy sector?", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(50), sector VARCHAR(50)); INSERT INTO suppliers (id, name, sector) VALUES (1, 'Supplier X', 'Renewable Energy'), (2, 'Supplier Y', 'Textiles'), (3, 'Supplier Z', 'Renewable Energy');", "sql": "SELECT name FROM suppliers WHERE sector = 'Renewable Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What streak start has a total less than 79, 22t as the rank, and tampa bay as the teams?", "schema": "CREATE TABLE table_name_28 (streak_start VARCHAR, teams VARCHAR, total VARCHAR, rank VARCHAR)", "sql": "SELECT streak_start FROM table_name_28 WHERE total < 79 AND rank = '22t' AND teams = 'tampa bay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total amount of waste produced by each mining company?", "schema": "CREATE TABLE CompanyWaste (CompanyID INT, Company VARCHAR(20), Waste INT); INSERT INTO CompanyWaste (CompanyID, Company, Waste) VALUES (1, 'Canada Gold', 1000), (2, 'USA Silver', 2000), (3, 'Mexico Coal', 1500);", "sql": "SELECT Company, SUM(Waste) FROM CompanyWaste GROUP BY Company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Update the names of authors with the last name 'Doe' to 'Smith' in the 'authors' table", "schema": "CREATE TABLE authors (author_id INT PRIMARY KEY, first_name VARCHAR(50), last_name VARCHAR(50));", "sql": "UPDATE authors SET last_name = 'Smith' WHERE last_name = 'Doe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average safety rating for creative AI applications in the 'creative_ai' table?", "schema": "CREATE TABLE creative_ai (app_id INT, app_name TEXT, safety_rating FLOAT);", "sql": "SELECT AVG(safety_rating) FROM creative_ai;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 180).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb '[{\"a\": \"a\", \"b\": \"foo\", \"t\": \"aaa\", \"js\": [1, \"2\", {}], \"jb\": {\"x\": [1, \"2\", {}]}}, {\"a\": 2}]', '$[0]' RETURNING sqljsonb_rec);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb '[{\"a\": \"a\", \"b\": \"foo\", \"t\": \"aaa\", \"js\": [1, \"2\", {}], \"jb\": {\"x\": [1, \"2\", {}]}}, {\"a\": 2}]', '$[0]' RETURNING sqljsonb_rec)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the publication year ranking l.a. is 1st?", "schema": "CREATE TABLE table_19948664_2 (year_of_publication VARCHAR, ranking_la__2_ VARCHAR)", "sql": "SELECT year_of_publication FROM table_19948664_2 WHERE ranking_la__2_ = '1st';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all the articles published by 'Al Jazeera' that are related to the Middle East.", "schema": "CREATE TABLE al_jazeera (article_id INT, title TEXT, content TEXT, publisher TEXT); INSERT INTO al_jazeera (article_id, title, content, publisher) VALUES (1, 'Article 1', 'Middle East content', 'Al Jazeera'), (2, 'Article 2', 'Sports content', 'Al Jazeera');", "sql": "SELECT * FROM al_jazeera WHERE content LIKE '%Middle East%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total labor hours for all traditional building projects in the state of Washington, Oregon, and California?", "schema": "CREATE TABLE project (id INT, state VARCHAR(20), type VARCHAR(20), hours INT); INSERT INTO project (id, state, type, hours) VALUES (1, 'Washington', 'Sustainable', 500), (2, 'Oregon', 'Sustainable', 600), (3, 'Seattle', 'Traditional', 300), (4, 'California', 'Traditional', 700);", "sql": "SELECT SUM(hours) FROM project WHERE state IN ('Washington', 'Oregon', 'California') AND type = 'Traditional';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Album # of 3rd is what chinese title?", "schema": "CREATE TABLE table_name_57 (chinese_title VARCHAR, album_number VARCHAR)", "sql": "SELECT chinese_title FROM table_name_57 WHERE album_number = '3rd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the average bioprocess engineering project cost per country, ordered by total cost?", "schema": "CREATE SCHEMA if not exists bioprocess;CREATE TABLE if not exists bioprocess.projects (id INT PRIMARY KEY, country VARCHAR(50), name VARCHAR(255), cost DECIMAL(10, 2)); INSERT INTO bioprocess.projects (id, country, name, cost) VALUES (1, 'USA', 'ProjectA', 50000.00), (2, 'Canada', 'ProjectB', 75000.00), (3, 'Mexico', 'ProjectC', 35000.00), (4, 'USA', 'ProjectD', 80000.00);", "sql": "SELECT country, AVG(cost) AS avg_cost FROM bioprocess.projects GROUP BY country ORDER BY avg_cost DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What was the total amount donated by recurring donors in Q3 2021?", "schema": "CREATE TABLE donors (id INT PRIMARY KEY, donor_type VARCHAR(20), donor_since DATE); INSERT INTO donors (id, donor_type, donor_since) VALUES (1, 'recurring', '2021-07-01'); CREATE TABLE donations (id INT PRIMARY KEY, donor_id INT, donation_amount INT, donation_date DATE); INSERT INTO donations (id, donor_id, donation_amount, donation_date) VALUES (1, 1, 50, '2021-09-01');", "sql": "SELECT SUM(donation_amount) FROM donations d JOIN donors don ON d.donor_id = don.id WHERE donor_type = 'recurring' AND donation_date BETWEEN '2021-07-01' AND '2021-09-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Update the 'resilience_score' of the bridge named 'Chenab Bridge' to 92.5.", "schema": "CREATE TABLE bridges (id INT, name TEXT, region TEXT, resilience_score FLOAT); INSERT INTO bridges (id, name, region, resilience_score) VALUES (1, 'Golden Gate Bridge', 'West Coast', 85.2), (2, 'Brooklyn Bridge', 'East Coast', 76.3), (3, 'Bay Bridge', 'West Coast', 78.1), (4, 'Chenab Bridge', 'South Asia', 89.6);", "sql": "UPDATE bridges SET resilience_score = 92.5 WHERE name = 'Chenab Bridge';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'largeobject': Write the SELECT query (example 105).", "schema": null, "sql": "SELECT lowrite(42, 'x');", "explanation": "Regression test for Largeobject in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT lowrite(42, 'x')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Capital of brześć nad bugiem has what area (1930) in 1000skm?", "schema": "CREATE TABLE table_name_72 (area__1930__in_1 VARCHAR, capital VARCHAR)", "sql": "SELECT area__1930__in_1, 000 AS skm_2 FROM table_name_72 WHERE capital = 'brześć nad bugiem';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the revised of mccune reischauer of yŏn (s) ryŏn (n)", "schema": "CREATE TABLE table_name_48 (revised VARCHAR, mccune_reischauer VARCHAR)", "sql": "SELECT revised FROM table_name_48 WHERE mccune_reischauer = 'yŏn (s) ryŏn (n)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which lowest rank(player) has a rebound average larger than 9, out of 920 rebounds, and who played more than 79 games?", "schema": "CREATE TABLE table_name_10 (rank INTEGER, games VARCHAR, reb_avg VARCHAR, total_rebounds VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_10 WHERE reb_avg > 9 AND total_rebounds = 920 AND games > 79;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the record on june 29?", "schema": "CREATE TABLE table_name_25 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_name_25 WHERE date = 'june 29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What group on the island of Faray has a Height (m) of 32 and a Population of 0?", "schema": "CREATE TABLE table_name_99 (group VARCHAR, island VARCHAR, height__m_ VARCHAR, population VARCHAR)", "sql": "SELECT group FROM table_name_99 WHERE height__m_ = 32 AND population = '0' AND island = 'faray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is \"house\", when \"four\" is \"opat\"?", "schema": "CREATE TABLE table_name_89 (house VARCHAR, four VARCHAR)", "sql": "SELECT house FROM table_name_89 WHERE four = 'opat';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the maximum donation amount from Australia?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationAmount DECIMAL(10,2), Country TEXT);", "sql": "SELECT MAX(DonationAmount) FROM Donors WHERE Country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of members in unions that have collective bargaining agreements?", "schema": "CREATE TABLE unions (id INT, union_name VARCHAR(255), has_cba BOOLEAN);", "sql": "SELECT COUNT(*) FROM unions WHERE has_cba = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total cost of mental health treatments for patients with anxiety?", "schema": "CREATE TABLE MentalHealthParity (id INT, patientID INT, condition VARCHAR(50), treatment VARCHAR(50), cost DECIMAL(5,2)); INSERT INTO MentalHealthParity (id, patientID, condition, treatment, cost) VALUES (1, 1001, 'Anxiety', 'Counseling', 80.00), (2, 1002, 'Depression', 'Medication', 100.00);", "sql": "SELECT patientID, SUM(cost) as 'TotalCost' FROM MentalHealthParity WHERE condition = 'Anxiety';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Provide the number of ocean floor mapping projects by country and year.", "schema": "CREATE SCHEMA oceans;CREATE TABLE oceans.mapping_projects_by_year (id INT PRIMARY KEY, country VARCHAR(50), year INT, num_projects INT); INSERT INTO oceans.mapping_projects_by_year (id, country, year, num_projects) VALUES (1, 'Canada', 2020, 2), (2, 'Mexico', 2020, 1);", "sql": "SELECT context.country, context.year, SUM(context.num_projects) FROM oceans.mapping_projects_by_year AS context GROUP BY context.country, context.year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the number of high severity vulnerabilities for each system?", "schema": "CREATE TABLE vulnerabilities(id INT, system VARCHAR(20), severity VARCHAR(10), date DATE); INSERT INTO vulnerabilities VALUES (1, 'web server', 'high', '2021-01-01'); INSERT INTO vulnerabilities VALUES (2, 'database', 'low', '2021-01-02');", "sql": "SELECT system, severity, COUNT(*) FROM vulnerabilities WHERE severity = 'high' GROUP BY system;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the average duration of space missions for each spacecraft model?", "schema": "CREATE TABLE Spacecraft (id INT, name TEXT, model TEXT, manufacturer TEXT); CREATE TABLE SpaceMissions (id INT, spacecraft_id INT, mission TEXT, duration INT);", "sql": "SELECT model, AVG(duration) FROM SpaceMissions JOIN Spacecraft ON SpaceMissions.spacecraft_id = Spacecraft.id GROUP BY model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_schema' (example 4).", "schema": null, "sql": "CREATE SCHEMA AUTHORIZATION regress_create_schema_role\n CREATE VIEW schema_not_existing.view AS SELECT 1;", "explanation": "DDL from PostgreSQL core regression test for Create Schema.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Which satellites were deployed by both SpaceTech Inc. and Orbital Inc.?", "schema": "CREATE TABLE Satellites (satellite_id INT, name VARCHAR(50), launch_date DATE, manufacturer VARCHAR(50)); INSERT INTO Satellites (satellite_id, name, launch_date, manufacturer) VALUES (1, 'Sat1', '2020-01-01', 'SpaceTech Inc.'), (2, 'Sat2', '2019-05-15', 'Orbital Inc.'), (3, 'Sat3', '2021-03-27', 'SpaceTech Inc.'), (4, 'Sat4', '2018-12-12', 'Orbital Inc.');", "sql": "SELECT name FROM Satellites WHERE manufacturer IN ('SpaceTech Inc.', 'Orbital Inc.') GROUP BY name HAVING COUNT(DISTINCT manufacturer) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the average playtime for adventure games?", "schema": "CREATE TABLE Player_Games(player_id INT, game_id INT, playtime INT); INSERT INTO Player_Games(player_id, game_id, playtime) VALUES (1, 1, 50); INSERT INTO Player_Games(player_id, game_id, playtime) VALUES (2, 2, 100); INSERT INTO Player_Games(player_id, game_id, playtime) VALUES (3, 3, 75);", "sql": "SELECT AVG(playtime) FROM Player_Games JOIN Games ON Player_Games.game_id = Games.game_id WHERE genre = 'Adventure';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Updated In Past 30 Days, when Registration is \"Open to people 13 and over\"?", "schema": "CREATE TABLE table_name_93 (updated_in_past_30_days VARCHAR, registration VARCHAR)", "sql": "SELECT updated_in_past_30_days FROM table_name_93 WHERE registration = 'open to people 13 and over';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangefuncs' (example 6).", "schema": null, "sql": "-- function with ORDINALITY\nselect * from rngfunct(1) with ordinality as z(a,b,ord);", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of trees in the forestry_data schema, excluding trees that are in the no_management_zone table?", "schema": "CREATE TABLE forestry_data.young_forest (tree_id INT, species VARCHAR(50), age INT, height INT, location VARCHAR(50));CREATE TABLE forestry_data.mature_forest (tree_id INT, species VARCHAR(50), age INT, height INT, location VARCHAR(50));CREATE TABLE forestry_data.protected_zone (tree_id INT, species VARCHAR(50), age INT, height INT, location VARCHAR(50));CREATE TABLE forestry_data.no_management_zone (tree_id INT, species VARCHAR(50), age INT, height INT, location VARCHAR(50));", "sql": "SELECT COUNT(*) FROM forestry_data.young_forest UNION ALL SELECT COUNT(*) FROM forestry_data.mature_forest EXCEPT SELECT COUNT(*) FROM forestry_data.no_management_zone;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position does Alexei Lazarenko have?", "schema": "CREATE TABLE table_name_62 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_name_62 WHERE player = 'alexei lazarenko';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Update the 'digital_assets' table to set the 'circulating_supply' to 15000000 for all records where the asset_name is 'Bitcoin'", "schema": "CREATE TABLE digital_assets (asset_id INT PRIMARY KEY, asset_name VARCHAR(100), asset_type VARCHAR(50), circulating_supply INT);", "sql": "UPDATE digital_assets SET circulating_supply = 15000000 WHERE asset_name = 'Bitcoin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What are the regulatory frameworks in place for blockchain in the European Union?", "schema": "CREATE TABLE regulatory_frameworks (framework_id INT, country VARCHAR(100), framework VARCHAR(100)); INSERT INTO regulatory_frameworks (framework_id, country, framework) VALUES (1, 'EU', 'Framework1'), (2, 'EU', 'Framework2'), (3, 'EU', 'Framework3');", "sql": "SELECT framework FROM regulatory_frameworks WHERE country = 'EU';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the change in recycling rates for plastics, paper, and metals from 2020 to 2021 for the country 'USA'?", "schema": "CREATE TABLE recycling_rates_history (id INT, country VARCHAR(255), year INT, plastics DECIMAL(3,2), paper DECIMAL(3,2), metals DECIMAL(3,2)); INSERT INTO recycling_rates_history (id, country, year, plastics, paper, metals) VALUES (1, 'USA', 2020, 0.30, 0.55, 0.72), (2, 'USA', 2021, 0.33, 0.58, 0.75);", "sql": "SELECT (recycling_rates_history.plastics_2021 - recycling_rates_history.plastics_2020) AS plastics_change, (recycling_rates_history.paper_2021 - recycling_rates_history.paper_2020) AS paper_change, (recycling_rates_history.metals_2021 - recycling_rates_history.metals_2020) AS metals_change FROM (SELECT country, plastics AS plastics_2020, paper AS paper_2020, metals AS metals_2020 FROM recycling_rates_history WHERE year = 2020 AND country = 'USA' UNION ALL SELECT country, plastics AS plastics_2021, paper AS paper_2021, metals AS metals_2021 FROM recycling_rates_history WHERE year = 2021 AND country = 'USA') AS recycling_rates_history;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 643, "num_statements": 1} {"question": "Find users who have not received any likes on their posts and are from a country other than the US.", "schema": "CREATE TABLE users (id INT, name VARCHAR(50), country VARCHAR(2), followers INT); INSERT INTO users (id, name, country, followers) VALUES (1, 'Alice', 'US', 1000), (2, 'Bob', 'JP', 500), (3, 'Charlie', 'CA', 1500), (4, 'David', 'MX', 200), (5, 'Eve', 'DE', 800); CREATE TABLE posts (id INT, user_id INT, timestamp DATETIME, likes INT); INSERT INTO posts (id, user_id, timestamp, likes) VALUES (1, 1, '2022-01-01 10:00:00', 0), (2, 1, '2022-01-02 11:00:00', 5), (3, 2, '2022-01-03 12:00:00', 0);", "sql": "SELECT users.name FROM users LEFT JOIN posts ON users.id = posts.user_id WHERE users.country != 'US' AND posts.likes = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 176).", "schema": null, "sql": "SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers' PASSING BY REF data);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers' PASSING BY REF data)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average environmental impact score for chemical 'D'?", "schema": "CREATE TABLE environmental_impact (chemical VARCHAR(10), score INT); INSERT INTO environmental_impact VALUES ('D', 25), ('D', 30), ('D', 20), ('E', 35), ('E', 40);", "sql": "SELECT AVG(score) FROM environmental_impact WHERE chemical = 'D';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average response time for police stations in Atlanta?", "schema": "CREATE TABLE atlanta_police_responses (id INT, response_time INT, location VARCHAR(20)); INSERT INTO atlanta_police_responses (id, response_time, location) VALUES (1, 150, 'Atlanta'), (2, 120, 'Atlanta');", "sql": "SELECT AVG(response_time) FROM atlanta_police_responses WHERE location = 'Atlanta';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average number of community health centers per state in the Midwest?", "schema": "CREATE TABLE community_health_centers (center_id INT, state TEXT, center_type TEXT); INSERT INTO community_health_centers (center_id, state, center_type) VALUES (1, 'Illinois', 'Community Health Center'), (2, 'Indiana', 'Mental Health Center');", "sql": "SELECT AVG(COUNT(*)) FROM community_health_centers GROUP BY state HAVING state LIKE '%Midwest%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "List all renewable energy types from the renewable_energy_types table in the renewable_energy schema.", "schema": "CREATE SCHEMA IF NOT EXISTS renewable_energy; CREATE TABLE IF NOT EXISTS renewable_energy.renewable_energy_types ( energy_type_id INT NOT NULL, energy_type VARCHAR(255) NOT NULL, PRIMARY KEY (energy_type_id));", "sql": "SELECT energy_type FROM renewable_energy.renewable_energy_types;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Update the safety_rating of 'Methyl Ethyl Ketone' to 3 in the chemical_table", "schema": "CREATE TABLE chemical_table (chemical_id INT, chemical_name VARCHAR(50), safety_rating INT);", "sql": "UPDATE chemical_table SET safety_rating = 3 WHERE chemical_name = 'Methyl Ethyl Ketone';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the frequency mhz with city of license being chattanooga, tennessee", "schema": "CREATE TABLE table_13998897_1 (frequency_mhz VARCHAR, city_of_license VARCHAR)", "sql": "SELECT frequency_mhz FROM table_13998897_1 WHERE city_of_license = 'Chattanooga, Tennessee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 158).", "schema": null, "sql": "SELECT 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::citext::uuid = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::uuid AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "How many cases were handled by attorneys in each region?", "schema": "CREATE TABLE Attorneys (AttorneyID int, Name varchar(50), Region varchar(50)); INSERT INTO Attorneys VALUES (1, 'John Smith', 'Northeast'), (2, 'Jane Doe', 'Southeast'); CREATE TABLE Cases (CaseID int, AttorneyID int); INSERT INTO Cases VALUES (1, 1), (2, 1), (3, 2), (4, 2);", "sql": "SELECT A.Region, COUNT(C.CaseID) as NumCases FROM Attorneys A JOIN Cases C ON A.AttorneyID = C.AttorneyID GROUP BY A.Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the success rate of space missions led by private companies?", "schema": "CREATE TABLE space_missions_by_company (company VARCHAR(255), num_missions INT, num_successful_missions INT); INSERT INTO space_missions_by_company (company, num_missions, num_successful_missions) VALUES ('SpaceX', 105, 98); INSERT INTO space_missions_by_company (company, num_missions, num_successful_missions) VALUES ('Blue Origin', 25, 23);", "sql": "SELECT company, (num_successful_missions::DECIMAL / num_missions) * 100 AS success_rate FROM space_missions_by_company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 233).", "schema": null, "sql": "SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME=\"Japan\" or COUNTRY_NAME=\"India\"]' PASSING data COLUMNS id int PATH '@id');", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME=\"Japan\" or COUNTRY_NAME=\"India\"]' PASSING data COLUMNS id int PATH '@id')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 147, "num_statements": 1} {"question": "Insert records of new mineral extractions in the 'South America' region.", "schema": "CREATE TABLE New_Extractions_2 (country TEXT, mineral TEXT, quantity INTEGER, region TEXT);", "sql": "INSERT INTO New_Extractions_2 (country, mineral, quantity, region) VALUES ('Brazil', 'Diamond', 120, 'South America'); INSERT INTO New_Extractions_2 (country, mineral, quantity, region) VALUES ('Peru', 'Emerald', 90, 'South America');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What district(s) did henry clay represent?", "schema": "CREATE TABLE table_2668264_8 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_2668264_8 WHERE incumbent = 'Henry Clay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many tourists from each country visited sustainable destinations?", "schema": "CREATE TABLE Sustainable_Destinations (id INT, destination_name VARCHAR(50), sustainable BOOLEAN); CREATE TABLE Tourists_Destinations (tourist_id INT, destination_id INT, visit_date DATE); INSERT INTO Sustainable_Destinations VALUES (1, 'Eco Village', true); INSERT INTO Sustainable_Destinations VALUES (2, 'Green City', true); INSERT INTO Tourists_Destinations VALUES (1, 1, '2022-01-01'); INSERT INTO Tourists_Destinations VALUES (2, 2, '2022-01-02'); INSERT INTO Tourists_Destinations VALUES (3, 1, '2022-01-03');", "sql": "SELECT Tourists.nationality, COUNT(DISTINCT Tourists_Destinations.tourist_id) AS num_tourists FROM Tourists_Destinations INNER JOIN Tourists ON Tourists_Destinations.tourist_id = Tourists.id INNER JOIN Sustainable_Destinations ON Tourists_Destinations.destination_id = Sustainable_Destinations.id WHERE Sustainable_Destinations.sustainable = true GROUP BY Tourists.nationality;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 377, "num_statements": 1} {"question": "How many Shariah-compliant finance transactions were made in Q1 2022 by gender?", "schema": "CREATE TABLE shariah_compliant_finance(id INT, transaction_id INT, gender VARCHAR(10), quarter INT, year INT); INSERT INTO shariah_compliant_finance VALUES (1, 301, 'Male', 1, 2022); INSERT INTO shariah_compliant_finance VALUES (2, 302, 'Female', 1, 2022); INSERT INTO shariah_compliant_finance VALUES (3, 303, 'Male', 2, 2022); INSERT INTO shariah_compliant_finance VALUES (4, 304, 'Female', 2, 2022); INSERT INTO shariah_compliant_finance VALUES (5, 305, 'Non-binary', 1, 2022);", "sql": "SELECT gender, COUNT(transaction_id) FROM shariah_compliant_finance WHERE quarter = 1 AND year = 2022 GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "What was the minimum investment in agricultural innovation in 'Middle East' up to 2021?", "schema": "CREATE TABLE agricultural_innovation (innovation_id INT, innovation_name TEXT, region TEXT, investment_amount INT, year INT); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (1, 'Drought-Resistant Crops', 'Africa', 2000000, 2020); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (2, 'Precision Farming', 'Asia', 3000000, 2021); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (3, 'Sustainable Farming', 'Middle East', 2500000, 2019); INSERT INTO agricultural_innovation (innovation_id, innovation_name, region, investment_amount, year) VALUES (4, 'Aquaponics', 'Middle East', 2000000, 2020);", "sql": "SELECT MIN(investment_amount) FROM agricultural_innovation WHERE year <= 2021 AND region = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Get the number of workplaces with safety issues", "schema": "CREATE TABLE workplaces (id INT, name TEXT, location TEXT, safety_issues INT);", "sql": "SELECT COUNT(*) FROM workplaces WHERE safety_issues > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players have more than 1000 hours of training?", "schema": "CREATE TABLE Player (HS INTEGER)", "sql": "SELECT COUNT(*) FROM Player WHERE HS > 1000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Who is the leading goal scorer in the history of the English Premier League?", "schema": "CREATE TABLE epl_goals (player_name VARCHAR(50), goals INT, assists INT); INSERT INTO epl_goals (player_name, goals, assists) VALUES ('Alan Shearer', 260, 64), ('Wayne Rooney', 208, 103);", "sql": "SELECT player_name, SUM(goals) as total_goals FROM epl_goals GROUP BY player_name ORDER BY total_goals DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average annual rainfall and temperature for each urban farm in the \"urban_farms\", \"urban_regions\", and \"weather\" tables?", "schema": "CREATE TABLE urban_farms (id INT, urban_region_id INT); CREATE TABLE urban_regions (id INT, name VARCHAR(50)); CREATE TABLE weather (id INT, region_id INT, year INT, rainfall FLOAT, temperature FLOAT);", "sql": "SELECT urban_farms.id AS farm_id, urban_regions.name AS region, AVG(weather.rainfall) AS avg_rainfall, AVG(weather.temperature) AS avg_temperature FROM urban_farms INNER JOIN urban_regions ON urban_farms.urban_region_id = urban_regions.id INNER JOIN weather ON urban_regions.id = weather.region_id GROUP BY urban_farms.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 322, "num_statements": 1} {"question": "What is the total amount of loans issued for Shariah-compliant microfinance in India?", "schema": "CREATE TABLE shariah_microfinance (id INT, loan_type VARCHAR(255), amount DECIMAL(10,2), country VARCHAR(255));", "sql": "SELECT SUM(amount) FROM shariah_microfinance WHERE loan_type = 'Shariah-compliant microfinance' AND country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 16).", "schema": null, "sql": "SELECT * FROM check_test(\n has_inherited_tables( 'parent', 'Gimme more' ),\n true,\n 'has_inherited_tables(tab, desc)',\n 'Gimme more',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the margin of victory for the winning score of −10 (71-65-68-70=274)?", "schema": "CREATE TABLE table_name_78 (margin_of_victory VARCHAR, winning_score VARCHAR)", "sql": "SELECT margin_of_victory FROM table_name_78 WHERE winning_score = −10(71 - 65 - 68 - 70 = 274);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What are the different mining operations in the Asia-Pacific region?", "schema": "CREATE TABLE Operations (Company VARCHAR(50), Operation VARCHAR(50), Location VARCHAR(10)); INSERT INTO Operations (Company, Operation, Location) VALUES ('GHI Mines', 'Coal', 'Asia'), ('JKL Mining', 'Gold', 'Australia'), ('MNO Drilling', 'Oil', 'Pacific');", "sql": "SELECT DISTINCT Operation FROM Operations WHERE Location LIKE 'Asia%' OR Location LIKE 'Pacific%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the total revenue for the 'Chinese' cuisine type in March 2022?", "schema": "CREATE TABLE restaurant_revenue (date DATE, cuisine VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO restaurant_revenue (date, cuisine, revenue) VALUES ('2022-02-01', 'Italian', 1500.00), ('2022-02-01', 'Mexican', 1200.00), ('2022-02-02', 'Italian', 1800.00), ('2022-02-02', 'Mexican', 1400.00), ('2022-02-03', 'Indian', 1600.00), ('2022-02-03', 'Italian', 1900.00), ('2022-02-04', 'Mexican', 1700.00), ('2022-02-04', 'Indian', 1300.00), ('2022-03-01', 'Chinese', 1100.00), ('2022-03-02', 'Chinese', 1200.00);", "sql": "SELECT cuisine, SUM(revenue) as total_revenue FROM restaurant_revenue WHERE date >= '2022-03-01' AND date <= '2022-03-31' AND cuisine = 'Chinese' GROUP BY cuisine;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 30).", "schema": null, "sql": "select '{(\\\\,a)}'::textmultirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{(\\\\,a)}'::textmultirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 566).", "schema": null, "sql": "select jsonb_path_query('[]', '$.time_tz()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[]', '$.time_tz()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which club has 1 cap?", "schema": "CREATE TABLE table_name_90 (club_province VARCHAR, caps VARCHAR)", "sql": "SELECT club_province FROM table_name_90 WHERE caps = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "List the names of countries with no viewers for any concert?", "schema": "CREATE TABLE Concerts (id INT, title VARCHAR(255), location VARCHAR(255), viewers INT);", "sql": "SELECT location FROM Concerts WHERE viewers = 0 GROUP BY location HAVING COUNT(*) = (SELECT COUNT(*) FROM Concerts WHERE Concerts.viewers = 0);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the percentage of female and male employees by department?", "schema": "CREATE TABLE employee_demographics (id INT, employee_id INT, department_id INT, gender VARCHAR(10)); INSERT INTO employee_demographics (id, employee_id, department_id, gender) VALUES (1, 1, 1, 'Female'), (2, 2, 1, 'Male'), (3, 3, 2, 'Female'), (4, 4, 2, 'Non-binary'), (5, 5, 3, 'Male');", "sql": "SELECT department_id, gender, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM employee_demographics WHERE department_id = departments.id) as percentage FROM employee_demographics JOIN departments ON employee_demographics.department_id = departments.id GROUP BY department_id, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 276, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Surface on during 10 November 2006?", "schema": "CREATE TABLE table_name_41 (surface VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_41 WHERE date = '10 november 2006';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date did they play agains the New York Rangers?", "schema": "CREATE TABLE table_23308178_4 (date VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_23308178_4 WHERE opponent = 'New York Rangers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the oldest artist in the 'Famous_Artists' table?", "schema": "CREATE TABLE Famous_Artists (artist_id INT, artist_name VARCHAR(255), birth_date DATE);", "sql": "SELECT artist_name FROM Famous_Artists ORDER BY birth_date ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Add a new satellite project 'Satellite XYZ' launched by 'NASA'", "schema": "CREATE TABLE satellite_projects (id INT PRIMARY KEY, name VARCHAR(255), organization VARCHAR(255), launch_date DATE);", "sql": "INSERT INTO satellite_projects (id, name, organization, launch_date) VALUES (1, 'Satellite XYZ', 'NASA', '2025-04-22');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are all the runner ups when the score is 9.12 (66) – 5.6 (36)?", "schema": "CREATE TABLE table_1139835_9 (runner_up VARCHAR, score VARCHAR)", "sql": "SELECT runner_up FROM table_1139835_9 WHERE score = '9.12 (66) – 5.6 (36)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "List all mines and their environmental impact score, grouped by country", "schema": "CREATE TABLE mine (id INT, name TEXT, country TEXT, environmental_impact_score INT); INSERT INTO mine VALUES (1, 'Mine A', 'Country A', 60); INSERT INTO mine VALUES (2, 'Mine B', 'Country B', 75); INSERT INTO mine VALUES (3, 'Mine C', 'Country A', 45);", "sql": "SELECT country, environmental_impact_score, AVG(environmental_impact_score) as avg_score FROM mine GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the total playtime, in hours, for each game, broken down by genre?", "schema": "CREATE TABLE GamePlaytime (PlayerID INT, PlayerName TEXT, Game TEXT, Genre TEXT, Playtime INT); INSERT INTO GamePlaytime (PlayerID, PlayerName, Game, Genre, Playtime) VALUES (1, 'John Doe', 'Game A', 'Shooter', 50), (2, 'Jane Smith', 'Game B', 'Strategy', 75), (3, 'Bob Johnson', 'Game C', 'Shooter', 100), (4, 'Alice Williams', 'Game D', 'Role-playing', 30), (5, 'Charlie Brown', 'Game A', 'Shooter', 25);", "sql": "SELECT Genre, SUM(Playtime / 60) AS TotalPlaytime FROM GamePlaytime GROUP BY Genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average number of ethical AI courses offered by universities in South America?", "schema": "CREATE TABLE Ethical_AI_Courses (University VARCHAR(50), Courses INT);", "sql": "SELECT AVG(Courses) FROM Ethical_AI_Courses WHERE University IN (SELECT University FROM Ethical_AI_Courses WHERE Country IN ('Argentina', 'Brazil', 'Colombia') GROUP BY University HAVING COUNT(*) >= 2);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 264).", "schema": null, "sql": "INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')\n ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO SELECT RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 127, "num_statements": 1} {"question": "Update the community service hours of offenders in California by adding 10 hours to their current total.", "schema": "CREATE TABLE offenders (id INT, name TEXT, state TEXT, community_service_hours INT); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (1, 'John Doe', 'California', 50); INSERT INTO offenders (id, name, state, community_service_hours) VALUES (2, 'Jane Smith', 'California', 75);", "sql": "UPDATE offenders SET community_service_hours = community_service_hours + 10 WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the amount of 000s from 1996?", "schema": "CREATE TABLE table_name_20 (total__000s_ INTEGER, year VARCHAR)", "sql": "SELECT SUM(total__000s_) FROM table_name_20 WHERE year = '1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what rounds did Luigi Fagioli drive for Alfa Romeo SPA?", "schema": "CREATE TABLE table_name_69 (rounds VARCHAR, entrant VARCHAR, driver VARCHAR)", "sql": "SELECT rounds FROM table_name_69 WHERE entrant = 'alfa romeo spa' AND driver = 'luigi fagioli';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the plaid cymru for 4 may 2011", "schema": "CREATE TABLE table_name_9 (plaid_cymru VARCHAR, date_s__conducted VARCHAR)", "sql": "SELECT plaid_cymru FROM table_name_9 WHERE date_s__conducted = '4 may 2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which frequency is located in Davao?", "schema": "CREATE TABLE table_name_35 (frequency VARCHAR, location VARCHAR)", "sql": "SELECT frequency FROM table_name_35 WHERE location = 'davao';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Year(s) Won that has the To par larger than 5, and the Total smaller than 155, and the Country of united states?", "schema": "CREATE TABLE table_name_18 (year_s__won VARCHAR, country VARCHAR, to_par VARCHAR, total VARCHAR)", "sql": "SELECT year_s__won FROM table_name_18 WHERE to_par > 5 AND total < 155 AND country = 'united states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the minimum age of artists who have exhibited in galleries located in the Warehouse District?", "schema": "CREATE TABLE galleries (id INT, name TEXT, location TEXT, city TEXT, state TEXT, zip INT); INSERT INTO galleries (id, name, location, city, state, zip) VALUES (1, 'Gallery X', 'Warehouse District', 'Denver', 'CO', 80202); CREATE TABLE artists (id INT, name TEXT, age INT, gallery_id INT); INSERT INTO artists (id, name, age, gallery_id) VALUES (1, 'Brian', 28, 1);", "sql": "SELECT MIN(age) FROM artists JOIN galleries ON artists.gallery_id = galleries.id WHERE galleries.location = 'Warehouse District';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the total cargo weight handled in the first quarter of 2022 for each country?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(255), country VARCHAR(255)); INSERT INTO ports VALUES (1, 'Port of Shanghai', 'China'); CREATE TABLE cargo (cargo_id INT, port_id INT, weight FLOAT, handling_date DATE); INSERT INTO cargo VALUES (1, 1, 5000, '2021-01-01');", "sql": "SELECT c.country, SUM(c.weight) as total_weight FROM ports p JOIN cargo c ON p.port_id = c.port_id WHERE handling_date >= '2022-01-01' AND handling_date < '2022-04-01' GROUP BY c.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people attended the score of w 7-6?", "schema": "CREATE TABLE table_name_15 (crowd VARCHAR, score VARCHAR)", "sql": "SELECT crowd FROM table_name_15 WHERE score = 'w 7-6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Delete all records in the 'athletes' table where 'team' is 'Bears'", "schema": "CREATE TABLE athletes (id INT, name VARCHAR(50), position VARCHAR(50), team VARCHAR(50), age INT);", "sql": "DELETE FROM athletes WHERE team = 'Bears';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "List all the defense innovation projects, along with their respective funding amounts, from the 'Innovation' and 'Funding' tables.", "schema": "CREATE TABLE Innovation (id INT, project VARCHAR(255)); CREATE TABLE Funding (id INT, project VARCHAR(255), amount DECIMAL(10,2));", "sql": "SELECT Innovation.project, Funding.amount FROM Innovation INNER JOIN Funding ON Innovation.id = Funding.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Rating(s), when Weekly Rank is 76/88?", "schema": "CREATE TABLE table_name_7 (rating VARCHAR, weekly_rank VARCHAR)", "sql": "SELECT COUNT(rating) FROM table_name_7 WHERE weekly_rank = '76/88';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Find the names of the attendees who have visited both musical and theater events in NY.", "schema": "CREATE TABLE Events (event_id INT, event_type VARCHAR(50), location VARCHAR(50)); CREATE TABLE Attendance (attendee_id INT, event_id INT); INSERT INTO Events (event_id, event_type, location) VALUES (1, 'Musical', 'New York'), (2, 'Theater', 'Los Angeles'), (3, 'Musical', 'New York'); INSERT INTO Attendance (attendee_id, event_id) VALUES (1, 1), (1, 2), (2, 1), (3, 3);", "sql": "SELECT attendee_id FROM Attendance A WHERE EXISTS (SELECT 1 FROM Events E WHERE E.event_type = 'Musical' AND E.location = 'New York' AND A.event_id = E.event_id) AND EXISTS (SELECT 1 FROM Events F WHERE F.event_type = 'Theater' AND F.location = 'New York' AND A.event_id = F.event_id);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which entrant scored less than 8 points and used a Maserati chassis?", "schema": "CREATE TABLE table_name_91 (entrant VARCHAR, points VARCHAR, chassis VARCHAR)", "sql": "SELECT entrant FROM table_name_91 WHERE points < 8 AND chassis = 'maserati';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average hire date for employees in the IT department?", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Gender VARCHAR(10), HireDate DATE); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Gender, HireDate) VALUES (1, 'John', 'Doe', 'IT', 'Male', '2020-01-01'), (2, 'Jane', 'Doe', 'IT', 'Female', '2021-01-01');", "sql": "SELECT AVG(HireDate) FROM Employees WHERE Department = 'IT';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List all freight forwarding transactions for 'CustomerA' with their corresponding statuses and dates.", "schema": "CREATE TABLE Customers (CustomerID VARCHAR(20), CustomerName VARCHAR(20)); INSERT INTO Customers (CustomerID, CustomerName) VALUES ('A', 'CustomerA'), ('B', 'CustomerB'); CREATE TABLE FreightForwardingTransactions (TransactionID INT, CustomerID VARCHAR(20), TransactionStatus VARCHAR(20), TransactionDate DATE); INSERT INTO FreightForwardingTransactions (TransactionID, CustomerID, TransactionStatus, TransactionDate) VALUES (1, 'A', 'Created', '2022-01-01'), (2, 'A', 'InProgress', '2022-01-02');", "sql": "SELECT FreightForwardingTransactions.TransactionID, FreightForwardingTransactions.TransactionStatus, FreightForwardingTransactions.TransactionDate FROM Customers JOIN FreightForwardingTransactions ON Customers.CustomerID = FreightForwardingTransactions.CustomerID WHERE Customers.CustomerName = 'CustomerA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 307, "num_statements": 1} {"question": "Maximum fairness score for AI models submitted by women.", "schema": "CREATE TABLE ai_fairness (model_name TEXT, fairness_score INTEGER, submitter_gender TEXT); INSERT INTO ai_fairness (model_name, fairness_score, submitter_gender) VALUES ('ModelX', 95, 'Female'), ('ModelY', 88, 'Male'), ('ModelZ', 98, 'Female');", "sql": "SELECT MAX(fairness_score) FROM ai_fairness WHERE submitter_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: yes or no for the adelaide with no for auckland, yes for melbourne, yes for the gold coast?", "schema": "CREATE TABLE table_name_97 (adelaide VARCHAR, gold_coast VARCHAR, auckland VARCHAR, melbourne VARCHAR)", "sql": "SELECT adelaide FROM table_name_97 WHERE auckland = 'no' AND melbourne = 'yes' AND gold_coast = 'yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which distinct source system code includes the substring 'en'?", "schema": "CREATE TABLE cmi_cross_references (source_system_code VARCHAR)", "sql": "SELECT DISTINCT source_system_code FROM cmi_cross_references WHERE source_system_code LIKE '%en%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 21).", "schema": null, "sql": "SELECT * FROM test_int8 WHERE i>1::int4 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all company names and headquarters in the descending order of market value.", "schema": "CREATE TABLE company (company VARCHAR, headquarters VARCHAR, market_value VARCHAR)", "sql": "SELECT company, headquarters FROM company ORDER BY market_value DESC;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Update the User_Experience value for the 'Great Wall of China Tour' in the Virtual_Tourism table, adding 150 to the current value.", "schema": "CREATE TABLE Virtual_Tourism (Experience VARCHAR(50), Platform VARCHAR(50), User_Experience INT); INSERT INTO Virtual_Tourism (Experience, Platform, User_Experience) VALUES ('Great Wall of China Tour', 'Google Arts & Culture', 4800), ('Eiffel Tower Tour', 'AirPano', 3500), ('Vatican City Tour', 'Yandex', 5200);", "sql": "UPDATE Virtual_Tourism SET User_Experience = User_Experience + 150 WHERE Experience = 'Great Wall of China Tour';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What is the average monthly data usage for prepaid mobile subscribers?", "schema": "CREATE TABLE data_usage (subscriber_id INT, service VARCHAR(10), start_date DATE, end_date DATE, data_usage INT); INSERT INTO data_usage (subscriber_id, service, start_date, end_date, data_usage) VALUES (1, 'prepaid', '2022-01-01', '2022-01-31', 3000), (2, 'prepaid', '2022-02-01', '2022-02-28', 3500);", "sql": "SELECT AVG(data_usage) FROM data_usage WHERE service = 'prepaid';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Insert new cultural competency training record for Massachusetts", "schema": "CREATE TABLE cultural_competency_training (chw_id INT, state VARCHAR(2), year INT, completed BOOLEAN);", "sql": "INSERT INTO cultural_competency_training (chw_id, state, year, completed) VALUES (789, 'MA', 2022, FALSE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Identify the wildlife habitats that have a decrease in area between 2018 and 2019, and order them by the largest decrease first.", "schema": "CREATE TABLE wildlife_habitats (habitat_id INT, habitat_name VARCHAR(50), year INT, area INT); INSERT INTO wildlife_habitats (habitat_id, habitat_name, year, area) VALUES (1, 'Forest', 2018, 1000), (2, 'Wetland', 2018, 2000), (3, 'Grassland', 2018, 3000), (4, 'Forest', 2019, 900), (5, 'Wetland', 2019, 1800), (6, 'Grassland', 2019, 2800);", "sql": "SELECT habitat_name, (LAG(area, 1) OVER (PARTITION BY habitat_name ORDER BY year)) - area AS area_decrease FROM wildlife_habitats WHERE year = 2019 GROUP BY habitat_name, area ORDER BY area_decrease DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 204, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the average amount of transactions for different investors.", "schema": "CREATE TABLE TRANSACTIONS (investor_id VARCHAR, amount_of_transaction INTEGER)", "sql": "SELECT investor_id, AVG(amount_of_transaction) FROM TRANSACTIONS GROUP BY investor_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 41).", "schema": null, "sql": "SELECT '42'::float4 / 'Infinity'::float4;", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '42'::float4 / 'Infinity'::float4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "what is the number of animals by species in the 'animal_population' table?", "schema": "CREATE TABLE animal_population (species VARCHAR(50), population INT); INSERT INTO animal_population (species, population) VALUES ('Tiger', 200), ('Lion', 300), ('Elephant', 400);", "sql": "SELECT species, SUM(population) FROM animal_population GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 226).", "schema": null, "sql": "SELECT to_tsquery('english', '(1 <3> 2) <-> a');", "explanation": "Regression test for Tsearch in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_tsquery('english', '(1 <3> 2) <-> a')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_type' (example 2).", "schema": null, "sql": "CREATE FUNCTION widget_out(widget)\n RETURNS cstring\n AS :'regresslib'\n LANGUAGE C STRICT IMMUTABLE;", "explanation": "DDL from PostgreSQL core regression test for Create Type.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of the students who have more than one advisor?", "schema": "CREATE TABLE student (name VARCHAR, id VARCHAR); CREATE TABLE advisor (s_id VARCHAR)", "sql": "SELECT T1.name FROM student AS T1 JOIN advisor AS T2 ON T1.id = T2.s_id GROUP BY T2.s_id HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (test-id-time-subpart-custom-start, item 7).", "schema": null, "sql": "CREATE TABLE partman_test.id_taptest_table (\n col1 int NOT NULL\n , col2 text DEFAULT 'stuff'\n , col3 timestamptz NOT NULL DEFAULT now())\n PARTITION BY RANGE (col1);", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 176, "num_statements": 1} {"question": "Which countries have more than 5 excavation sites?", "schema": "CREATE TABLE Countries (CountryID INT, CountryName TEXT); INSERT INTO Countries (CountryID, CountryName) VALUES (1, 'Country-X'), (2, 'Country-Y'), (3, 'Country-Z'); CREATE TABLE Sites (SiteID INT, SiteName TEXT, CountryID INT); INSERT INTO Sites (SiteID, SiteName, CountryID) VALUES (1, 'Site-A', 1), (2, 'Site-B', 2), (3, 'Site-C', 3), (4, 'Site-D', 1), (5, 'Site-E', 1), (6, 'Site-F', 2), (7, 'Site-G', 3), (8, 'Site-H', 3), (9, 'Site-I', 3);", "sql": "SELECT Countries.CountryName, COUNT(DISTINCT Sites.SiteID) AS SiteCount FROM Countries INNER JOIN Sites ON Countries.CountryID = Sites.CountryID GROUP BY Countries.CountryName HAVING SiteCount > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum 2010 value for China?", "schema": "CREATE TABLE table_30133_3 (economy VARCHAR)", "sql": "SELECT MAX(2010) FROM table_30133_3 WHERE economy = 'China';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players received high points where location/attendance was UIC Pavilion 6,304 respectively?", "schema": "CREATE TABLE table_17118657_7 (high_points VARCHAR, location_attendance VARCHAR)", "sql": "SELECT COUNT(high_points) FROM table_17118657_7 WHERE location_attendance = 'UIC Pavilion 6,304';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people were in attendance when the Washington Nationals had a score of 7-3 and a loss of Worrell (0-1)?", "schema": "CREATE TABLE table_name_54 (attendance VARCHAR, score VARCHAR, loss VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_54 WHERE score = '7-3' AND loss = 'worrell (0-1)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the venue for 23 july 1992", "schema": "CREATE TABLE table_name_64 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_64 WHERE date = '23 july 1992';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time for Sheffield?", "schema": "CREATE TABLE table_name_15 (time VARCHAR, city VARCHAR)", "sql": "SELECT time FROM table_name_15 WHERE city = 'sheffield';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the average volume of timber produced annually in mangrove forests over the last decade?", "schema": "CREATE TABLE mangrove_timber (id INT, year INT, volume FLOAT);", "sql": "SELECT AVG(volume) as avg_annual_volume FROM mangrove_timber WHERE year BETWEEN 2011 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which name has a percentage of 0.59%?", "schema": "CREATE TABLE table_name_28 (name VARCHAR, percentage VARCHAR)", "sql": "SELECT name FROM table_name_28 WHERE percentage = '0.59%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: return the smallest salary for every departments.", "schema": "CREATE TABLE employees (department_id VARCHAR, salary INTEGER)", "sql": "SELECT MIN(salary), department_id FROM employees GROUP BY department_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many communities are in the 'Planning' stage for each country?", "schema": "CREATE TABLE If Not Exists community_development (community_id INT, community_name TEXT, location TEXT, development_stage TEXT); INSERT INTO community_development (community_id, community_name, location, development_stage) VALUES (4, 'Community D', 'Somalia', 'Planning'), (5, 'Community E', 'Sudan', 'Planning');", "sql": "SELECT location, COUNT(*) as num_communities FROM community_development WHERE development_stage = 'Planning' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What ordered has helena as the name?", "schema": "CREATE TABLE table_name_87 (ordered VARCHAR, name VARCHAR)", "sql": "SELECT ordered FROM table_name_87 WHERE name = 'helena';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of virtual tours in 'Asia' and 'Europe'?", "schema": "CREATE TABLE VirtualTours (TourID INTEGER, TourName TEXT, Location TEXT); INSERT INTO VirtualTours (TourID, TourName, Location) VALUES (1, 'Virtual Safari', 'Kenya'), (2, 'Virtual Castle Tour', 'Scotland'), (3, 'Virtual Trek', 'Nepal'), (4, 'Virtual Skiing', 'Switzerland'), (5, 'Virtual Temple Tour', 'Japan');", "sql": "SELECT SUM(CASE WHEN Location IN ('Asia', 'Europe') THEN 1 ELSE 0 END) FROM VirtualTours;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is Robert Morris University-Illinois held?", "schema": "CREATE TABLE table_27361255_1 (location VARCHAR, institution VARCHAR)", "sql": "SELECT location FROM table_27361255_1 WHERE institution = 'Robert Morris University-Illinois';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total number of volunteers who have joined in each year in the 'Volunteers' table?", "schema": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName VARCHAR(50), JoinDate DATE); INSERT INTO Volunteers (VolunteerID, VolunteerName, JoinDate) VALUES (1, 'Sophia Garcia', '2021-05-01'), (2, 'Ali Hassan', '2022-01-15'), (3, 'Lea Kim', '2021-12-31'), (4, 'Han Mehta', '2022-02-28');", "sql": "SELECT YEAR(JoinDate) as Year, COUNT(*) as TotalVolunteers FROM Volunteers GROUP BY Year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the average size of affordable housing units in the city of Seattle?", "schema": "CREATE TABLE AffordableHousing (id INT, city VARCHAR(20), size FLOAT);", "sql": "SELECT AVG(size) FROM AffordableHousing WHERE city = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the position in 2012-13 and number of seasons in leigue 1 of 30 with ligue 1 titles of 1", "schema": "CREATE TABLE table_name_69 (position_in_2012_13 VARCHAR, number_of_seasons_in_ligue_1 VARCHAR, ligue_1_titles VARCHAR)", "sql": "SELECT position_in_2012_13 FROM table_name_69 WHERE number_of_seasons_in_ligue_1 = 30 AND ligue_1_titles = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the color quality when the relay is ✓?", "schema": "CREATE TABLE table_name_9 (color_quality VARCHAR, relay VARCHAR)", "sql": "SELECT color_quality FROM table_name_9 WHERE relay = '✓';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the silver medal count of the team that finished with 8 bronze medals?", "schema": "CREATE TABLE table_name_41 (silver INTEGER, bronze VARCHAR)", "sql": "SELECT SUM(silver) FROM table_name_41 WHERE bronze = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "List all waste types", "schema": "CREATE TABLE waste_types (id INT PRIMARY KEY, waste_type VARCHAR(255)); INSERT INTO waste_types (id, waste_type) VALUES (1, 'Plastic'), (2, 'Paper');", "sql": "SELECT * FROM waste_types;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What club has losses greater than 1, 4 for the wins, with points against less than 894?", "schema": "CREATE TABLE table_name_83 (club VARCHAR, points_against VARCHAR, loses VARCHAR, wins VARCHAR)", "sql": "SELECT club FROM table_name_83 WHERE loses > 1 AND wins = 4 AND points_against < 894;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average threat intelligence metric score for the Pacific region in the past week?", "schema": "CREATE TABLE threat_intelligence (threat_id INT, threat_score INT, threat_region VARCHAR(255), threat_date DATE); INSERT INTO threat_intelligence (threat_id, threat_score, threat_region, threat_date) VALUES (1, 7, 'Pacific', '2021-01-01'); INSERT INTO threat_intelligence (threat_id, threat_score, threat_region, threat_date) VALUES (2, 8, 'Atlantic', '2021-02-01'); INSERT INTO threat_intelligence (threat_id, threat_score, threat_region, threat_date) VALUES (3, 9, 'Pacific', '2021-02-02');", "sql": "SELECT threat_region, AVG(threat_score) as avg_threat_score FROM threat_intelligence WHERE threat_date >= DATEADD(week, -1, GETDATE()) GROUP BY threat_region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which exaltation has a domicile of Saturn and a fall of Jupiter?", "schema": "CREATE TABLE table_name_98 (exaltation VARCHAR, domicile VARCHAR, fall VARCHAR)", "sql": "SELECT exaltation FROM table_name_98 WHERE domicile = 'saturn' AND fall = 'jupiter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average production (bbl) per well in the 'Gulf of Mexico'?", "schema": "CREATE TABLE well_production (well_id INT, region VARCHAR(20), year INT, production INT); INSERT INTO well_production (well_id, region, year, production) VALUES (1, 'Gulf of Mexico', 2020, 100000), (2, 'Gulf of Mexico', 2019, 120000), (3, 'Alaska', 2020, 150000);", "sql": "SELECT AVG(production) FROM well_production WHERE region = 'Gulf of Mexico';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Count the number of articles published in the 'culture' section with a word count greater than 1000.", "schema": "CREATE TABLE articles (id INT, title VARCHAR(255), section VARCHAR(64), word_count INT); INSERT INTO articles (id, title, section, word_count) VALUES (1, 'ArticleA', 'culture', 1200), (2, 'ArticleB', 'politics', 800), (3, 'ArticleC', 'culture', 1500);", "sql": "SELECT COUNT(*) FROM articles WHERE section = 'culture' AND word_count > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "How many fair trade certified factories are there in Vietnam?", "schema": "CREATE TABLE FairTradeCertification (factory VARCHAR(50), certification VARCHAR(50)); INSERT INTO FairTradeCertification VALUES ('Factory1', 'Fair Trade'), ('Factory2', 'Not Certified'), ('Factory3', 'Fair Trade'), ('Factory4', 'Not Certified');", "sql": "SELECT COUNT(*) FROM FairTradeCertification WHERE certification = 'Fair Trade';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the date of the first airing of the episode with series number 63?", "schema": "CREATE TABLE table_10718192_2 (original_air_date VARCHAR, no_in_series VARCHAR)", "sql": "SELECT original_air_date FROM table_10718192_2 WHERE no_in_series = 63;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 942).", "schema": null, "sql": "select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '{}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '{}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the captain when teh shirt sponsor is n/a?", "schema": "CREATE TABLE table_27631756_2 (captain VARCHAR, shirt_sponsor VARCHAR)", "sql": "SELECT captain FROM table_27631756_2 WHERE shirt_sponsor = 'N/A';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 85).", "schema": null, "sql": "SELECT * FROM check_test(\n col_default_is( 'public', 'sometab', 'myat', 'now()', 'desc' ),\n true,\n 'col_default_is( schema, tab, col, expression, desc )',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date was for Japan?", "schema": "CREATE TABLE table_name_66 (date VARCHAR, country VARCHAR)", "sql": "SELECT date FROM table_name_66 WHERE country = 'japan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the adoption rate of AI-powered chatbots in 'South America' for 'Q2 2021'?", "schema": "CREATE TABLE ai_adoption (region VARCHAR(20), quarter INT, adoption_rate DECIMAL(5,2)); INSERT INTO ai_adoption (region, quarter, adoption_rate) VALUES ('North America', 2, 68.50), ('South America', 2, 58.20);", "sql": "SELECT adoption_rate FROM ai_adoption WHERE region = 'South America' AND quarter = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the maximum number of players in a multiplayer game?", "schema": "CREATE TABLE Games (GameID INT, MaxPlayers INT, Players INT); INSERT INTO Games (GameID, MaxPlayers, Players) VALUES (1, 10, 5);", "sql": "SELECT MAX(MaxPlayers) FROM Games;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 378).", "schema": null, "sql": "select percentile_cont(0.5) within group (order by b), sum(b) from aggtest;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select percentile_cont(0.5) within group (order by b), sum(b) from aggtest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 169).", "schema": null, "sql": "SELECT * FROM (VALUES (1),(2),(3)) v(r) LEFT JOIN rngfunc_sql(11,13) ON (r+i)<100;", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM (VALUES (1),(2),(3)) v(r) LEFT JOIN rngfunc_sql(11,13) ON (r+i)<100) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Find the total number of investments in the healthcare sector with a risk level above 5.", "schema": "CREATE TABLE investments (id INT, company_id INT, sector VARCHAR(255), risk_level INT); INSERT INTO investments (id, company_id, sector, risk_level) VALUES (1, 4, 'Healthcare', 6), (2, 5, 'Healthcare', 8), (3, 6, 'Healthcare', 7);", "sql": "SELECT COUNT(*) FROM investments WHERE sector = 'Healthcare' AND risk_level > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was Carlton's away team opponents?", "schema": "CREATE TABLE table_name_16 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_16 WHERE home_team = 'carlton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 9).", "schema": null, "sql": "CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Best dancer with the Worst dancer of Jerry Springer, a Best score of 29, and the Dance was the Quickstep?", "schema": "CREATE TABLE table_name_47 (best_dancer VARCHAR, dance VARCHAR, worst_dancer VARCHAR, best_score VARCHAR)", "sql": "SELECT best_dancer FROM table_name_47 WHERE worst_dancer = 'jerry springer' AND best_score = 29 AND dance = 'quickstep';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Show a SQL definition from the pglogical project (pglogical--2.2.1, item 18).", "schema": null, "sql": "CREATE TABLE pglogical.replication_set (\n set_id oid NOT NULL PRIMARY KEY,\n set_nodeid oid NOT NULL,\n set_name name NOT NULL,\n replicate_insert boolean NOT NULL DEFAULT true,\n replicate_update boolean NOT NULL DEFAULT true,\n replicate_delete boolean NOT NULL DEFAULT true,\n replicate_truncate boolean NOT NULL DEFAULT true,\n UNIQUE (set_nodeid, set_name)\n) WITH (user_catalog_table=true);", "explanation": "SQL definition from the open-source pglogical PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 412, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest tourism arrivals in 2011 in millions with a 3.82 tourism competitiveness in 2011 and more than 1,102 US$ per arrival in 2011 tourism receipts?", "schema": "CREATE TABLE table_name_92 (tourist_arrivals__2011___millions_ INTEGER, tourism_competitiveness__2011___ttci_ VARCHAR, tourism_receipts__2011___us$_per_arrival_ VARCHAR)", "sql": "SELECT MAX(tourist_arrivals__2011___millions_) FROM table_name_92 WHERE tourism_competitiveness__2011___ttci_ = '3.82' AND tourism_receipts__2011___us$_per_arrival_ > 1 OFFSET 102;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "What is the minimum budget allocation for any service in CityB for the last fiscal year?", "schema": "CREATE TABLE fiscal_year (fiscal_year INT, start_date DATE, end_date DATE); INSERT INTO fiscal_year VALUES (2022, '2022-01-01', '2022-12-31'), (2023, '2023-01-01', '2023-12-31'); CREATE TABLE budget_allocation (service VARCHAR(20), fiscal_year INT, amount INT); INSERT INTO budget_allocation VALUES ('Healthcare', 2022, 500000), ('Education', 2022, 800000), ('Healthcare', 2023, 600000), ('Education', 2023, 900000); CREATE TABLE cities (id INT, name VARCHAR(20)); INSERT INTO cities VALUES (1, 'CityA'), (2, 'CityB'), (3, 'CityC');", "sql": "SELECT MIN(amount) FROM budget_allocation WHERE fiscal_year = (SELECT fiscal_year FROM fiscal_year WHERE start_date <= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND end_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR)) AND city_id = (SELECT id FROM cities WHERE name = 'CityB');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 275, "num_statements": 1} {"question": "What is the distribution of genetic research projects by primary investigator gender?", "schema": "CREATE SCHEMA if not exists genetics;CREATE TABLE if not exists genetics.research_projects (id INT, name VARCHAR(100), primary_investigator VARCHAR(50), primary_investigator_gender VARCHAR(10));INSERT INTO genetics.research_projects (id, name, primary_investigator, primary_investigator_gender) VALUES (1, 'ProjectX', 'Alex Garcia', 'male'), (2, 'ProjectY', 'Taylor Lee', 'non-binary'), (3, 'ProjectZ', 'Claire Kim', 'female');", "sql": "SELECT primary_investigator_gender, COUNT(*) as total_projects FROM genetics.research_projects GROUP BY primary_investigator_gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'copydml' (example 2).", "schema": null, "sql": "insert into copydml_test (t) values ('a');", "explanation": "DML from PostgreSQL core regression test for Copydml.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Insert a new 'FishHealth' record into the 'FishHealth' table with ID 1, fish ID 2, health score 80, and date entered '2022-07-30 15:00:00'", "schema": "CREATE TABLE FishHealth (id INT, fish_id INT, health_score INT, date_entered TIMESTAMP);", "sql": "INSERT INTO FishHealth (id, fish_id, health_score, date_entered) VALUES (1, 2, 80, '2022-07-30 15:00:00');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Retrieve the most recent geopolitical risk rating for India.", "schema": "CREATE TABLE GeopoliticalRiskAssessments (AssessmentID INT, Country VARCHAR(50), RiskRating VARCHAR(10), AssessmentDate DATE); INSERT INTO GeopoliticalRiskAssessments (AssessmentID, Country, RiskRating, AssessmentDate) VALUES (1, 'Iran', 'High', '2021-06-01'), (2, 'India', 'Medium', '2022-08-12');", "sql": "SELECT Country, RiskRating FROM GeopoliticalRiskAssessments WHERE AssessmentDate = (SELECT MAX(AssessmentDate) FROM GeopoliticalRiskAssessments WHERE Country = 'India');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the mountain peak when the location is 53.1370°n 119.2667°w?", "schema": "CREATE TABLE table_name_74 (mountain_peak VARCHAR, location VARCHAR)", "sql": "SELECT mountain_peak FROM table_name_74 WHERE location = '53.1370°n 119.2667°w';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Calculate the average account balance for customers in each city in the USA.", "schema": "CREATE TABLE customers (id INT, name VARCHAR(100), age INT, gender VARCHAR(10), city VARCHAR(50), state VARCHAR(50), account_balance DECIMAL(10,2));", "sql": "SELECT city, AVG(account_balance) as avg_balance FROM customers WHERE state = 'USA' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Find the average attendance at events in each country.", "schema": "CREATE TABLE Events (EventID INT, Name TEXT, Attendance INT);CREATE TABLE EventLocations (EventID INT, Country TEXT);", "sql": "SELECT EventLocations.Country, AVG(Events.Attendance) FROM Events INNER JOIN EventLocations ON Events.EventID = EventLocations.EventID GROUP BY EventLocations.Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Couple has a Rank by average larger than 3, and an Average larger than 16.5, and a Total smaller than 195, and a Number of dances larger than 3?", "schema": "CREATE TABLE table_name_20 (couple VARCHAR, number_of_dances VARCHAR, total VARCHAR, rank_by_average VARCHAR, average VARCHAR)", "sql": "SELECT couple FROM table_name_20 WHERE rank_by_average > 3 AND average > 16.5 AND total < 195 AND number_of_dances > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the percentage of games played at home for each team?", "schema": "CREATE TABLE teams (team_id INT, team_name TEXT, city TEXT); CREATE TABLE games (game_id INT, team_id INT, home BOOLEAN);", "sql": "SELECT t.team_name, (SUM(g.home) * 100.0 / COUNT(g.game_id)) as home_percentage FROM games g JOIN teams t ON g.team_id = t.team_id GROUP BY t.team_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "How many users signed up for the 'Elite' membership in the last quarter?", "schema": "CREATE TABLE memberships (membership_id INT, membership_type VARCHAR(50), signup_date DATE); INSERT INTO memberships (membership_id, membership_type, signup_date) VALUES (1, 'Basic', '2022-01-10'), (2, 'Premium', '2022-02-15'), (3, 'Elite', '2022-03-20'), (4, 'Basic', '2022-04-05');", "sql": "SELECT membership_type, COUNT(membership_id) as new_members FROM memberships WHERE membership_type = 'Elite' AND signup_date >= DATEADD(quarter, -1, CURRENT_DATE) GROUP BY membership_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the first date of election for the incumbent that was re-elected in the South Carolina 1 district?", "schema": "CREATE TABLE table_name_45 (first_elected VARCHAR, result VARCHAR, district VARCHAR)", "sql": "SELECT first_elected FROM table_name_45 WHERE result = 're-elected' AND district = 'south carolina 1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the party of re-elected, incumbent Charles H. Grosvenor, who was first elected before 1894?", "schema": "CREATE TABLE table_name_3 (party VARCHAR, incumbent VARCHAR, result VARCHAR, first_elected VARCHAR)", "sql": "SELECT party FROM table_name_3 WHERE result = 're-elected' AND first_elected < 1894 AND incumbent = 'charles h. grosvenor';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What was the maximum price of kids' garments in Australia?", "schema": "CREATE TABLE Categories (category_id INT, category VARCHAR(50), PRIMARY KEY (category_id)); INSERT INTO Categories (category_id, category) VALUES (1, 'Kids'), (2, 'Adults');", "sql": "SELECT MAX(price) as max_price FROM Products JOIN Categories ON Products.category = Categories.category WHERE Categories.category = 'Kids' AND Products.country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of points for field goals being 1", "schema": "CREATE TABLE table_14342480_6 (points VARCHAR, field_goals VARCHAR)", "sql": "SELECT COUNT(points) FROM table_14342480_6 WHERE field_goals = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the comp for games less than 2?", "schema": "CREATE TABLE table_name_63 (comp VARCHAR, games INTEGER)", "sql": "SELECT comp FROM table_name_63 WHERE games < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the minimum R&D expenditure for drugs approved in 2017?", "schema": "CREATE TABLE rd_expenditure (drug_id VARCHAR(10), approval_year INT, expenditure NUMERIC(12,2));", "sql": "SELECT MIN(expenditure) FROM rd_expenditure WHERE approval_year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "List the unique types of healthcare facilities in 'rural' areas, excluding pharmacies.", "schema": "CREATE TABLE healthcare_facilities (id INT, name TEXT, location TEXT, type TEXT); INSERT INTO healthcare_facilities (id, name, location, type) VALUES (1, 'Hospital A', 'rural', 'hospital'); INSERT INTO healthcare_facilities (id, name, location, type) VALUES (2, 'Clinic A', 'rural', 'clinic'); INSERT INTO healthcare_facilities (id, name, location, type) VALUES (3, 'Pharmacy A', 'rural', 'pharmacy');", "sql": "SELECT DISTINCT type FROM healthcare_facilities WHERE location = 'rural' AND type != 'pharmacy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "How many circular supply chain vendors are there in each region?", "schema": "CREATE TABLE vendors (vendor_id INT, vendor_name TEXT, region TEXT); INSERT INTO vendors (vendor_id, vendor_name, region) VALUES (1, 'Green Vendors', 'Northeast'); INSERT INTO vendors (vendor_id, vendor_name, region) VALUES (2, 'Eco-Friendly Vendors', 'West');", "sql": "SELECT region, COUNT(*) FROM vendors GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The match against Paul-Henri Mathieu had what outcome?", "schema": "CREATE TABLE table_name_92 (outcome VARCHAR, opponent VARCHAR)", "sql": "SELECT outcome FROM table_name_92 WHERE opponent = 'paul-henri mathieu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which university hosted in Long Beach?", "schema": "CREATE TABLE table_name_12 (host VARCHAR, city VARCHAR)", "sql": "SELECT host FROM table_name_12 WHERE city = 'long beach';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of military aircraft in the 'Military_Aircraft' table?", "schema": "CREATE SCHEMA IF NOT EXISTS defense_security;CREATE TABLE IF NOT EXISTS defense_security.Military_Aircraft (id INT PRIMARY KEY, aircraft_name VARCHAR(255), type VARCHAR(255), quantity INT);INSERT INTO defense_security.Military_Aircraft (id, aircraft_name, type, quantity) VALUES (1, 'F-16 Fighting Falcon', 'Fighter', 1000), (2, 'B-52 Stratofortress', 'Bomber', 76);", "sql": "SELECT SUM(quantity) FROM defense_security.Military_Aircraft;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What are the names of all rural infrastructure projects in the 'rural_infrastructure_2' table?", "schema": "CREATE TABLE rural_infrastructure_2 (id INT, project_name VARCHAR(50), sector VARCHAR(50)); INSERT INTO rural_infrastructure_2 (id, project_name, sector) VALUES (3, 'Smart Irrigation', 'Rural Infrastructure'), (4, 'Rural Connectivity', 'Rural Infrastructure');", "sql": "SELECT project_name FROM rural_infrastructure_2 WHERE sector = 'Rural Infrastructure';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the minimum budget allocated for technology for social good projects in Oceania countries?", "schema": "CREATE TABLE SocialGoodBudget (Country VARCHAR(50), Budget DECIMAL(10,2)); INSERT INTO SocialGoodBudget (Country, Budget) VALUES ('Australia', 1200000.00), ('New Zealand', 1500000.00); CREATE TABLE Countries (Country VARCHAR(50), Continent VARCHAR(50)); INSERT INTO Countries (Country, Continent) VALUES ('Australia', 'Oceania'), ('New Zealand', 'Oceania');", "sql": "SELECT MIN(SocialGoodBudget.Budget) AS MinBudget FROM SocialGoodBudget INNER JOIN Countries ON SocialGoodBudget.Country = Countries.Country WHERE Countries.Continent = 'Oceania';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "What was the average revenue per shipment for the month of January 2022?", "schema": "CREATE TABLE shipments (shipment_id INT, shipment_date DATE, revenue DECIMAL(10,2)); INSERT INTO shipments (shipment_id, shipment_date, revenue) VALUES (1, '2022-01-01', 1000), (2, '2022-01-05', 2000), (3, '2022-02-03', 3000);", "sql": "SELECT AVG(revenue) FROM shipments WHERE shipment_date BETWEEN '2022-01-01' AND '2022-01-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many airlines do we have?", "schema": "CREATE TABLE AIRLINES (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM AIRLINES;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Delete the records in the expedition_researchers table for the expedition 'Expedition2'.", "schema": "CREATE TABLE expeditions (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE); CREATE TABLE researchers (id INT PRIMARY KEY, name VARCHAR(50), affiliation VARCHAR(50)); CREATE TABLE expedition_researchers AS SELECT NULL id, e.name AS expedition, r.name AS researcher FROM expeditions e JOIN researchers r ON TRUE WHERE e.location = r.affiliation;", "sql": "DELETE FROM expedition_researchers WHERE expedition = 'Expedition2';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 49).", "schema": null, "sql": "SELECT min(row(a,b)) FROM aggtest;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT min(row(a,b)) FROM aggtest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What is the total cost of all lifelong learning programs offered in the region of Paris?", "schema": "CREATE TABLE regions (region_name VARCHAR(255), region_id INT); CREATE TABLE lifelong_learning_programs (program_id INT, program_name VARCHAR(255), region_id INT, program_cost DECIMAL(10,2), PRIMARY KEY (program_id), FOREIGN KEY (region_id) REFERENCES regions(region_id));", "sql": "SELECT SUM(lifelong_learning_programs.program_cost) FROM lifelong_learning_programs INNER JOIN regions ON lifelong_learning_programs.region_id = regions.region_id WHERE regions.region_name = 'Paris';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "What is the total cargo weight handled by container ships built before 2010, grouped by ship builder?", "schema": "CREATE TABLE container_ships (ship_id INT, ship_name VARCHAR(255), ship_builder VARCHAR(255), year INT, cargo_weight INT);INSERT INTO container_ships (ship_id, ship_name, ship_builder, year, cargo_weight) VALUES (1, 'Ever Given', 'Baosteel', 2010, 210000), (2, 'CMA CGM Marco Polo', 'Daewoo Shipbuilding & Marine Engineering', 2008, 165000);", "sql": "SELECT ship_builder, SUM(cargo_weight) FROM container_ships WHERE year < 2010 GROUP BY ship_builder;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the total number of military innovation projects and their average budget in Europe?", "schema": "CREATE TABLE MilitaryInnovationEurope (id INT, project VARCHAR(50), country VARCHAR(50), budget INT); INSERT INTO MilitaryInnovationEurope (id, project, country, budget) VALUES (1, 'Stealth Technology', 'UK', 12000000), (2, 'Cyber Warfare', 'Germany', 9000000), (3, 'Artificial Intelligence', 'France', 11000000);", "sql": "SELECT COUNT(*) AS total_projects, AVG(budget) AS avg_budget FROM MilitaryInnovationEurope;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Give me the name and year of opening of the manufacturers that have either less than 10 factories or more than 10 shops.", "schema": "CREATE TABLE manufacturer (name VARCHAR, open_year VARCHAR, num_of_shops VARCHAR, Num_of_Factories VARCHAR)", "sql": "SELECT name, open_year FROM manufacturer WHERE num_of_shops > 10 OR Num_of_Factories < 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the home team of the game at the time of 14:00?", "schema": "CREATE TABLE table_name_53 (home VARCHAR, time VARCHAR)", "sql": "SELECT home FROM table_name_53 WHERE time = '14:00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 12).", "schema": null, "sql": "SELECT '-1.0e-7'::cube AS cube;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 31, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'sqljson_queryfuncs' (example 202).", "schema": null, "sql": "INSERT INTO test_jsonb_constraints VALUES ('{\"a\": 1}', 1);", "explanation": "DML from PostgreSQL core regression test for Sqljson Queryfuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Find the total area of all buildings in the 'green_buildings' table", "schema": "CREATE TABLE green_buildings (id INT, name VARCHAR(50), location VARCHAR(50), area FLOAT, sustainability_rating INT);", "sql": "SELECT SUM(area) FROM green_buildings;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "List the number of healthcare providers in each county, for counties with more than 50 providers.", "schema": "CREATE TABLE healthcare_providers (id INT, county VARCHAR(10), count INT); INSERT INTO healthcare_providers (id, county, count) VALUES (1, 'County 1', 120), (2, 'County 2', 80), (3, 'County 3', 150), (4, 'County 4', 70), (5, 'County 5', 110);", "sql": "SELECT county, count FROM healthcare_providers WHERE count > 50 GROUP BY county;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total revenue generated by the Louvre from Impressionist paintings between 1950 and 1970?", "schema": "CREATE TABLE Artworks (artwork_id INT, name VARCHAR(255), artist_id INT, date_sold DATE, price DECIMAL(10,2), museum_id INT); CREATE TABLE Artists (artist_id INT, name VARCHAR(255), nationality VARCHAR(255), gender VARCHAR(255)); CREATE TABLE Museums (museum_id INT, name VARCHAR(255));", "sql": "SELECT SUM(Artworks.price) FROM Artworks INNER JOIN Artists ON Artworks.artist_id = Artists.artist_id INNER JOIN Museums ON Artworks.museum_id = Museums.museum_id WHERE Artists.nationality = 'Impressionist' AND Museums.name = 'The Louvre' AND YEAR(Artworks.date_sold) BETWEEN 1950 AND 1970;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 290, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the Competition with a Result of 5-0?", "schema": "CREATE TABLE table_name_7 (score VARCHAR, result VARCHAR)", "sql": "SELECT score FROM table_name_7 WHERE result = '5-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where does Essendon play their home games?", "schema": "CREATE TABLE table_name_38 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_38 WHERE home_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the energy efficiency rating for a specific appliance model.", "schema": "CREATE TABLE appliance_ratings (appliance_model VARCHAR(255), energy_efficiency_rating FLOAT); INSERT INTO appliance_ratings VALUES ('Model A', 4.5), ('Model B', 3.8), ('Model C', 5.0), ('Model D', 4.2), ('Model E', 4.8);", "sql": "SELECT energy_efficiency_rating FROM appliance_ratings WHERE appliance_model = 'Model C';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the total carbon emissions reduction achieved by clean energy projects in the US since 2015?", "schema": "CREATE TABLE projects (id INT, country VARCHAR(255), name VARCHAR(255), carbon_emissions_reduction INT, start_year INT); INSERT INTO projects (id, country, name, carbon_emissions_reduction, start_year) VALUES (1, 'US', 'Project1', 1500, 2015), (2, 'US', 'Project2', 2000, 2017), (3, 'US', 'Project3', 1000, 2016);", "sql": "SELECT SUM(carbon_emissions_reduction) FROM projects WHERE country = 'US' AND start_year >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 43).", "schema": null, "sql": "SELECT * FROM check_test(\n tables_are( ARRAY['fou', 'foo'] ),\n true,\n 'tables_are(tables)',\n 'Search path ' || pg_catalog.current_setting('search_path') || ' should have the correct tables',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 212, "num_statements": 1} {"question": "What is the total amount of research grants awarded to female faculty members in the 'faculty' and 'research_grants' tables?", "schema": "CREATE TABLE faculty (id INT, name VARCHAR(255), gender VARCHAR(10), department VARCHAR(255)); INSERT INTO faculty (id, name, gender, department) VALUES (1, 'Alice', 'Female', 'Physics'), (2, 'Bob', 'Male', 'Mathematics'), (3, 'Charlie', 'Male', 'Chemistry'), (4, 'Diana', 'Female', 'Biology');", "sql": "SELECT SUM(rg.amount) FROM research_grants rg JOIN faculty f ON rg.department = f.department WHERE f.gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'transactions' (example 131).", "schema": null, "sql": "INSERT INTO savepoints VALUES (15);", "explanation": "DML from PostgreSQL core regression test for Transactions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Report on November 15, 2000?", "schema": "CREATE TABLE table_name_18 (report VARCHAR, date VARCHAR)", "sql": "SELECT report FROM table_name_18 WHERE date = 'november 15, 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the season in the host city kėdainiai?", "schema": "CREATE TABLE table_name_59 (score VARCHAR, host_city VARCHAR)", "sql": "SELECT score FROM table_name_59 WHERE host_city = 'kėdainiai';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What round was jerry corcoran drafted in as pick number 114?", "schema": "CREATE TABLE table_name_63 (round INTEGER, pick VARCHAR, player VARCHAR)", "sql": "SELECT MAX(round) FROM table_name_63 WHERE pick > 114 AND player = 'jerry corcoran';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average flight time for each aircraft model by year?", "schema": "CREATE TABLE aircraft_flights (id INT, model VARCHAR(50), flight_hours DECIMAL(5,2), year INT); INSERT INTO aircraft_flights (id, model, flight_hours, year) VALUES (1, 'Boeing 737', 3500.5, 2019), (2, 'Airbus A320', 3200.2, 2019), (3, 'Boeing 787', 3800.8, 2018), (4, 'SpaceX Starship', 5000, 2022);", "sql": "SELECT model, year, AVG(flight_hours) as avg_flight_time FROM aircraft_flights GROUP BY model, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 18).", "schema": null, "sql": "SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the call sign w242at what's the city the license plate is registered to?", "schema": "CREATE TABLE table_name_78 (city_of_license VARCHAR, call_sign VARCHAR)", "sql": "SELECT city_of_license FROM table_name_78 WHERE call_sign = 'w242at';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Insert new records for 3 donors from the 'Donors' table", "schema": "CREATE TABLE Donors (DonorID INT, FirstName VARCHAR(50), LastName VARCHAR(50), DonationDate DATE, Amount DECIMAL(10,2));", "sql": "INSERT INTO Donors (DonorID, FirstName, LastName, DonationDate, Amount) VALUES (101, 'Sophia', 'Lee', '2022-12-31', 500), (102, 'Daniel', 'Kim', '2022-12-31', 750), (103, 'Ava', 'Garcia', '2022-12-31', 1000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "What is the total carbon tax revenue (in USD) for each country, ranked from highest to lowest?", "schema": "CREATE TABLE carbon_tax (country VARCHAR(50), revenue FLOAT); INSERT INTO carbon_tax (country, revenue) VALUES ('Country A', 5000000), ('Country B', 6000000), ('Country C', 4000000), ('Country D', 7000000);", "sql": "SELECT country, revenue, ROW_NUMBER() OVER (ORDER BY revenue DESC) as rank FROM carbon_tax;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 91, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 134).", "schema": null, "sql": "SELECT * FROM check_test(\n type_owner_is('sometype', '__no-one', 'mumble'),\n\tfalse,\n 'type_owner_is(type, non-user, desc)',\n 'mumble',\n ' have: ' || current_user || '\n want: __no-one'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Insert new labor productivity data", "schema": "CREATE TABLE productivity (id INT PRIMARY KEY, company VARCHAR(100), value DECIMAL(5,2));", "sql": "INSERT INTO productivity (company, value) VALUES ('Teck Resources', 320);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Who are the producers with the highest average rating for TV shows about social justice?", "schema": "CREATE TABLE shows (id INT, title TEXT, rating FLOAT, producer TEXT, genre TEXT); INSERT INTO shows (id, title, rating, producer, genre) VALUES (1, 'Show1', 7.5, 'Producer1', 'Social Justice'), (2, 'Show2', 8.2, 'Producer2', 'Social Justice'), (3, 'Show3', 6.8, 'Producer1', 'Comedy'), (4, 'Show4', 9.0, 'Producer3', 'Social Justice'), (5, 'Show5', 7.0, 'Producer1', 'Social Justice');", "sql": "SELECT producer, AVG(rating) FROM shows WHERE genre = 'Social Justice' GROUP BY producer ORDER BY AVG(rating) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Show the total revenue from concert ticket sales for each genre.", "schema": "CREATE TABLE Genres (GenreID INT, GenreName VARCHAR(255)); INSERT INTO Genres (GenreID, GenreName) VALUES (1, 'Pop'), (2, 'Rock'), (3, 'Jazz'), (4, 'Hip Hop'), (5, 'Country'); CREATE TABLE Concerts (ConcertID INT, GenreID INT, Venue VARCHAR(255), TicketPrice DECIMAL(5,2)); INSERT INTO Concerts (ConcertID, GenreID, Venue, TicketPrice) VALUES (1, 1, 'Venue1', 50.00), (2, 2, 'Venue2', 60.00), (3, 3, 'Venue3', 40.00), (4, 4, 'Venue4', 70.00), (5, 5, 'Venue5', 30.00), (6, 1, 'Venue6', 55.00), (7, 2, 'Venue7', 65.00), (8, 3, 'Venue8', 45.00);", "sql": "SELECT G.GenreName, SUM(C.TicketPrice) as TotalRevenue FROM Genres G JOIN Concerts C ON G.GenreID = C.GenreID GROUP BY G.GenreName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Find the average landfill capacity in APAC for countries with a capacity greater than 100,000 cubic meters.", "schema": "CREATE TABLE LandfillCapacityAPAC (id INT, country VARCHAR(50), region VARCHAR(50), capacity_cubic_meters INT); INSERT INTO LandfillCapacityAPAC (id, country, region, capacity_cubic_meters) VALUES (1, 'China', 'APAC', 120000), (2, 'Japan', 'APAC', 90000), (3, 'India', 'APAC', 150000);", "sql": "SELECT AVG(capacity_cubic_meters) FROM LandfillCapacityAPAC WHERE capacity_cubic_meters > 100000 AND region = 'APAC';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 115).", "schema": null, "sql": "SELECT xmlagg(data) FROM xmltest;", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlagg(data) FROM xmltest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the maximum number of humanitarian assistance personnel deployed by a single organization?", "schema": "CREATE TABLE humanitarian_assistance (id INT PRIMARY KEY, organization VARCHAR(100), personnel INT); INSERT INTO humanitarian_assistance (id, organization, personnel) VALUES (1, 'Org 1', 1200), (2, 'Org 2', 1500), (3, 'Org 3', 1000);", "sql": "SELECT MAX(personnel) FROM humanitarian_assistance;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "List the number of mobile and broadband subscribers per region for the most recent quarter.", "schema": "CREATE TABLE regions (region_id INT, region_name VARCHAR(50)); INSERT INTO regions (region_id, region_name) VALUES (1, 'North'), (2, 'South'), (3, 'East'), (4, 'West'); CREATE TABLE mobile_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO mobile_subscribers (subscriber_id, region_id, join_date) VALUES (1, 1, '2021-01-01'), (2, 2, '2021-03-01'), (3, 3, '2021-02-01'), (4, 4, '2021-04-01'); CREATE TABLE broadband_subscribers (subscriber_id INT, region_id INT, join_date DATE); INSERT INTO broadband_subscribers (subscriber_id, region_id, join_date) VALUES (5, 1, '2021-01-15'), (6, 2, '2021-03-15'), (7, 3, '2021-02-15'), (8, 4, '2021-04-15');", "sql": "SELECT r.region_name, COUNT(m.subscriber_id) AS mobile_count, COUNT(b.subscriber_id) AS broadband_count FROM regions r LEFT JOIN mobile_subscribers m ON r.region_id = m.region_id LEFT JOIN broadband_subscribers b ON r.region_id = b.region_id WHERE QUARTER(m.join_date) = QUARTER(CURRENT_DATE()) AND YEAR(m.join_date) = YEAR(CURRENT_DATE()) GROUP BY r.region_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 361, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest administrative panel value with an agricultural panel less than 1 and a labour panel value less than 5?", "schema": "CREATE TABLE table_name_56 (administrative_panel INTEGER, agricultural_panel VARCHAR, labour_panel VARCHAR)", "sql": "SELECT MAX(administrative_panel) FROM table_name_56 WHERE agricultural_panel < 1 AND labour_panel < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: where catalog number is 81258 , what are all the studio ?", "schema": "CREATE TABLE table_11222744_2 (studio VARCHAR, catalog_number VARCHAR)", "sql": "SELECT studio FROM table_11222744_2 WHERE catalog_number = '81258';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Identify the unique age ranges of members who participated in 'yoga' or 'spinning' classes.", "schema": "CREATE TABLE member_classes (member_id INT, class_type VARCHAR(50), age INT); INSERT INTO member_classes (member_id, class_type, age) VALUES (1, 'yoga', 30), (2, 'spinning', 35), (3, 'yoga', 40), (4, 'spinning', 25), (5, 'yoga', 30);", "sql": "SELECT DISTINCT FLOOR(age / 10) * 10 AS age_range FROM member_classes WHERE class_type IN ('yoga', 'spinning');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Add a new court named 'High Court of New Zealand' located in 'Wellington'", "schema": "CREATE TABLE courts (id INT, name VARCHAR(50), location VARCHAR(50));", "sql": "INSERT INTO courts (id, name, location) VALUES (3, 'High Court of New Zealand', 'Wellington');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average start with wins larger than 0 and 32nd position?", "schema": "CREATE TABLE table_name_34 (starts INTEGER, wins VARCHAR, position VARCHAR)", "sql": "SELECT AVG(starts) FROM table_name_34 WHERE wins > 0 AND position = '32nd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average data usage for each mobile subscriber in Mbytes?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT); INSERT INTO mobile_subscribers (subscriber_id, data_usage) VALUES (1, 50.5), (2, 75.3), (3, 32.1);", "sql": "SELECT subscriber_id, data_usage/1024/1024 AS avg_data_usage_mb FROM mobile_subscribers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Find the number of male and female fans who have attended basketball games in the last year", "schema": "CREATE TABLE fans (fan_id INT, gender VARCHAR(10), last_attended_game DATE); INSERT INTO fans (fan_id, gender, last_attended_game) VALUES (1, 'Male', '2022-02-15'), (2, 'Female', '2022-03-01'), (3, 'Male', '2021-12-31');", "sql": "SELECT gender, COUNT(*) as num_fans FROM fans WHERE last_attended_game >= DATEADD(year, -1, GETDATE()) GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the total number of space missions led by women?", "schema": "CREATE TABLE space_missions (id INT, name VARCHAR(255), leader VARCHAR(255), year INT); INSERT INTO space_missions (id, name, leader, year) VALUES (1, 'Mars Rover', 'Dr. Jessica Watkins', 2022); INSERT INTO space_missions (id, name, leader, year) VALUES (2, 'ISS Expedition', 'Anousheh Ansari', 2023);", "sql": "SELECT COUNT(*) FROM space_missions WHERE leader IN (SELECT name FROM astronauts WHERE gender = 'Female');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "List all clients who have a Shariah-compliant personal loan or a socially responsible auto loan?", "schema": "CREATE TABLE client_loans (client_id INT, loan_type VARCHAR(20)); INSERT INTO client_loans (client_id, loan_type) VALUES (1, 'Shariah-compliant personal loan'), (2, 'Socially responsible auto loan'), (3, 'Shariah-compliant mortgage');", "sql": "SELECT client_id FROM client_loans WHERE loan_type IN ('Shariah-compliant personal loan', 'Socially responsible auto loan');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Calculate the average severity of vulnerabilities for each software application", "schema": "CREATE TABLE vulnerabilities (id INT, software_app VARCHAR(50), severity INT);", "sql": "SELECT software_app, AVG(severity) as avg_severity FROM vulnerabilities GROUP BY software_app;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What NFL team was the player with pick number 28 drafted to?", "schema": "CREATE TABLE table_20996923_25 (nfl_team VARCHAR, pick__number VARCHAR)", "sql": "SELECT nfl_team FROM table_20996923_25 WHERE pick__number = 28;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the place when the player is Bob Gilder and the money was $20,903?", "schema": "CREATE TABLE table_name_88 (place VARCHAR, money___$__ VARCHAR, player VARCHAR)", "sql": "SELECT place FROM table_name_88 WHERE money___$__ = '20,903' AND player = 'bob gilder';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'subselect' (example 138).", "schema": null, "sql": "insert into ta values(1,1);", "explanation": "DML from PostgreSQL core regression test for Subselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Pleasure, when Drug is \"LSD\", and when Psychological Dependence is greater than 1.1?", "schema": "CREATE TABLE table_name_57 (pleasure INTEGER, drug VARCHAR, psychological_dependence VARCHAR)", "sql": "SELECT SUM(pleasure) FROM table_name_57 WHERE drug = 'lsd' AND psychological_dependence > 1.1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the drought impact on agriculture in California in terms of water usage reduction?", "schema": "CREATE TABLE drought_impact (state VARCHAR(20), sector VARCHAR(20), reduction FLOAT); INSERT INTO drought_impact (state, sector, reduction) VALUES ('California', 'Industrial', 0.1), ('California', 'Agriculture', 0.2), ('California', 'Domestic', 0.15);", "sql": "SELECT reduction FROM drought_impact WHERE state = 'California' AND sector = 'Agriculture';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the epa rated combined fuel economy for the all-electric vehicle with dirty electric grid rocky mountains (denver) of 330 g/mi (205 g/km)?", "schema": "CREATE TABLE table_23840623_4 (epa_rated_combined_fuel_economy VARCHAR, operating_mode VARCHAR, dirty_electric_grid_rocky_mountains__denver_ VARCHAR)", "sql": "SELECT epa_rated_combined_fuel_economy FROM table_23840623_4 WHERE operating_mode = 'All-electric' AND dirty_electric_grid_rocky_mountains__denver_ = '330 g/mi (205 g/km)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the total biomass of fish in the Pacific ocean?", "schema": "CREATE TABLE pacific_fish (id INT, name VARCHAR(50), biomass FLOAT); INSERT INTO pacific_fish (id, name, biomass) VALUES (1, 'Tuna', 3500.2), (2, 'Salmon', 2800.9), (3, 'Mackerel', 2200.6), (4, 'Squid', 1800.5);", "sql": "SELECT SUM(biomass) FROM pacific_fish;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the district with incumbent being sam hobbs", "schema": "CREATE TABLE table_1342270_3 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1342270_3 WHERE incumbent = 'Sam Hobbs';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many visual artists are represented in the database, and what is the distribution by their age group?", "schema": "CREATE TABLE artists (id INT, name VARCHAR(255), birth_date DATE, age INT);", "sql": "SELECT FLOOR((YEAR(CURRENT_DATE) - YEAR(birth_date)) / 10) * 10 as age_group, COUNT(*) as artist_count FROM artists GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the percent (1990) where state is united states", "schema": "CREATE TABLE table_1182314_5 (percent__1990_ VARCHAR, state VARCHAR)", "sql": "SELECT percent__1990_ FROM table_1182314_5 WHERE state = 'United states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The Estevan Bruins (WCHL) are affiliated with what NHL team?", "schema": "CREATE TABLE table_name_51 (nhl_team VARCHAR, college_junior_club_team VARCHAR)", "sql": "SELECT nhl_team FROM table_name_51 WHERE college_junior_club_team = 'estevan bruins (wchl)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Display union_name and position for employees in the 'employees' table who are members of unions with names starting with 'B'", "schema": "CREATE TABLE labor_unions (id INT, union_name VARCHAR(50), members INT); CREATE TABLE employees (id INT, union_id INT, name VARCHAR(50), position VARCHAR(50));", "sql": "SELECT e.name, e.position, l.union_name FROM employees e JOIN labor_unions l ON e.union_id = l.id WHERE l.union_name LIKE 'B%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Score has a Money ( £ ) of 90,400, and a Country of south africa, and a Player of thomas aiken? Question 1", "schema": "CREATE TABLE table_name_38 (score VARCHAR, player VARCHAR, money___£__ VARCHAR, country VARCHAR)", "sql": "SELECT score FROM table_name_38 WHERE money___£__ = '90,400' AND country = 'south africa' AND player = 'thomas aiken';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "What is the total revenue of eco-tourism businesses in Costa Rica?", "schema": "CREATE TABLE Revenue (id INT, country TEXT, type TEXT, amount FLOAT); INSERT INTO Revenue (id, country, type, amount) VALUES (1, 'Costa Rica', 'Eco-tourism', 800000), (2, 'Costa Rica', 'Hotels', 600000), (3, 'Costa Rica', 'Eco-tourism', 900000), (4, 'Costa Rica', 'Tour operators', 700000);", "sql": "SELECT SUM(amount) FROM Revenue WHERE country = 'Costa Rica' AND type = 'Eco-tourism';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What was the average price per pound for each product category sold in Oregon in 2021?", "schema": "CREATE TABLE sales (date DATE, dispensary VARCHAR(255), product VARCHAR(255), price DECIMAL(10,2), weight DECIMAL(10,2)); INSERT INTO sales VALUES ('2021-10-01', 'Dispensary A', 'Flower', 45.00, 10), ('2021-10-01', 'Dispensary B', 'Concentrate', 50.00, 20); CREATE TABLE dispensaries (name VARCHAR(255), state VARCHAR(2)); INSERT INTO dispensaries VALUES ('Dispensary A', 'OR'), ('Dispensary B', 'WA');", "sql": "SELECT p.product as product_category, AVG(price / weight) as avg_price_per_pound FROM sales JOIN dispensaries ON sales.dispensary = dispensaries.name WHERE EXTRACT(YEAR FROM date) = 2021 AND state = 'OR' GROUP BY p.product;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Length with the Version of Album Version?", "schema": "CREATE TABLE table_name_60 (length VARCHAR, version VARCHAR)", "sql": "SELECT length FROM table_name_60 WHERE version = 'album version';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'truncate' (example 46).", "schema": null, "sql": "INSERT INTO trunc_faa VALUES (5, 'five', 'FIVE');", "explanation": "DML from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Update the risk category of policyholders living in New York to 'High Risk'.", "schema": "CREATE TABLE Policyholders (ID INT, Name VARCHAR(50), Age INT, Gender VARCHAR(10), City VARCHAR(50), State VARCHAR(20), ZipCode VARCHAR(10), RiskCategory VARCHAR(10));", "sql": "UPDATE Policyholders SET RiskCategory = 'High Risk' WHERE State = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total quantity of eco-friendly dyes used in textile production in Europe?", "schema": "CREATE TABLE TextileProduction (id INT, dye VARCHAR(255), region VARCHAR(255), quantity INT); INSERT INTO TextileProduction (id, dye, region, quantity) VALUES (1, 'Natural Dye', 'Europe', 500), (2, 'Synthetic Dye', 'Asia', 700), (3, 'Low-Impact Dye', 'South America', 600);", "sql": "SELECT SUM(quantity) FROM TextileProduction WHERE dye IN ('Natural Dye', 'Low-Impact Dye') AND region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the chassis of the Honda Engine from 2008?", "schema": "CREATE TABLE table_name_68 (chassis VARCHAR, engine VARCHAR, year VARCHAR)", "sql": "SELECT chassis FROM table_name_68 WHERE engine = 'honda' AND year = 2008;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 296).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_trigger ( NAME, NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Find the number of distinct animal species in each habitat", "schema": "CREATE TABLE species (id INT, name VARCHAR(255));CREATE TABLE animals (id INT, species_id INT, habitat_id INT); INSERT INTO species (id, name) VALUES (1, 'Lion'), (2, 'Elephant'), (3, 'Giraffe'); INSERT INTO animals (id, species_id, habitat_id) VALUES (1, 1, 2), (2, 2, 1), (3, 3, 2);", "sql": "SELECT h.name AS habitat_name, COUNT(DISTINCT a.species_id) AS distinct_species FROM animals a INNER JOIN habitats h ON a.habitat_id = h.id GROUP BY h.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest year when Call of Duty 4: Modern Warfare was the game?", "schema": "CREATE TABLE table_name_63 (year INTEGER, game VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_63 WHERE game = 'call of duty 4: modern warfare';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'tablefunc' (example 75).", "schema": null, "sql": "SELECT * FROM connectby('connectby_int', 'keyid', 'parent_keyid', '2', 0, '~') AS t(keyid int, parent_keyid int, level int, branch float);", "explanation": "Example query from the 'tablefunc' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the average dissolved oxygen level for each species of fish in the fish_stock table?", "schema": "CREATE TABLE fish_stock (species VARCHAR(50), location VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO fish_stock (species, location, dissolved_oxygen) VALUES ('Tilapia', 'Lake Victoria', 6.5), ('Tilapia', 'Lake Tanganyika', 7.0), ('Salmon', 'Pacific Ocean', 8.0);", "sql": "SELECT species, AVG(dissolved_oxygen) FROM fish_stock GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--1.6.1--1.7.0, item 16).", "schema": null, "sql": "/*\n * Function to create a child table in a time-based partition set\n */\nCREATE OR REPLACE FUNCTION create_time_partition (p_parent_table text, p_partition_times timestamp[])\nRETURNS text\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nv_all text[] := ARRAY['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'TRUNCATE', 'REFERENCES', 'TRIGGER'];\nv_analyze boolean := FALSE;\nv_control text;\nv_grantees text[];\nv_hasoids boolean;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_old_search_path text;\nv_parent_grant record;\nv_parent_owner text;\nv_parent_schema text;\nv_parent_tablename text;\nv_partition_name text;\nv_partition_suffix text;\nv_parent_tablespace text;\nv_part_interval interval;\nv_partition_timestamp_end timestamp;\nv_partition_timestamp_start timestamp;\nv_quarter text;\nv_revoke text[];\nv_sql text;\nv_step_id bigint;\nv_step_overflow_id bigint;\nv_tablename text;\nv_trunc_value text;\nv_time timestamp;\nv_type text;\nv_year text;\n\nBEGIN\n\nSELECT type\n , control\n , part_interval\n , jobmon\nINTO v_type\n , v_control\n , v_part_interval\n , v_jobmon\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table\nAND (type = 'time-static' OR type = 'time-dynamic' OR type = 'time-custom');\n\nIF NOT FOUND THEN\n RAISE EXCEPTION 'ERROR: no config found for %', p_parent_table;\nEND IF;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_namespace n, pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE 'SELECT set_config(''search_path'',''@extschema@,'||v_jobmon_schema||''',''false'')';\n END IF;\nEND IF;\n\nSELECT tableowner, schemaname, tablename, tablespace INTO v_parent_owner, v_parent_schema, v_parent_tablename, v_parent_tablespace FROM pg_tables WHERE schemaname ||'.'|| tablename = p_parent_table;\n\nFOREACH v_time IN ARRAY p_partition_times LOOP\n\n v_partition_suffix := to_char(v_time, 'YYYY');\n IF v_part_interval < '1 year' AND v_part_interval <> '1 week' THEN\n v_partition_suffix := v_partition_suffix ||'_'|| to_char(v_time, 'MM');\n IF v_part_interval < '1 month' AND v_part_interval <> '1 week' THEN\n v_partition_suffix := v_partition_suffix ||'_'|| to_char(v_time, 'DD');\n IF v_part_interval < '1 day' THEN\n v_partition_suffix := v_partition_suffix || '_' || to_char(v_time, 'HH24MI');\n IF v_part_interval < '1 minute' THEN\n v_partition_suffix := v_partition_suffix || to_char(v_time, 'SS');\n END IF; -- end < minute IF\n END IF; -- end < day IF\n END IF; -- end < month IF\n END IF; -- end < year IF\n\n v_partition_timestamp_start := v_time;\n BEGIN\n v_partition_timestamp_end := v_time + v_part_interval;\n EXCEPTION WHEN datetime_field_overflow THEN\n RAISE WARNING 'Attempted partition time interval is outside PostgreSQL''s supported time range.\n Child partition creation after time % skipped', v_time;\n v_step_overflow_id := add_step(v_job_id, 'Attempted partition time interval is outside PostgreSQL''s supported time range.');\n PERFORM update_step(v_step_overflow_id, 'CRITICAL', 'Child partition creation after time '||v_time||' skipped');\n CONTINUE;\n END;\n\n IF v_part_interval = '1 week' THEN\n v_partition_suffix := to_char(v_time, 'IYYY') || 'w' || to_char(v_time, 'IW');\n END IF;\n\n -- \"Q\" is ignored in to_timestamp, so handle special case\n IF v_part_interval = '3 months' AND (v_type = 'time-static' OR v_type = 'time-dynamic') THEN\n v_year := to_char(v_time, 'YYYY');\n v_quarter := to_char(v_time, 'Q');\n v_partition_suffix := v_year || 'q' || v_quarter;\n END IF;\n\n v_partition_name := @extschema@.check_name_length(v_parent_tablename, v_parent_schema, v_partition_suffix, TRUE);\n\n SELECT tablename INTO v_tablename FROM pg_catalog.pg_tables WHERE schemaname ||'.'|| tablename = v_partition_name;\n IF v_tablename IS NOT NULL THEN\n CONTINUE;\n END IF;\n\n -- Ensure analyze is run if a new partition is created. Otherwise if one isn't, will be false and analyze will be skipped\n v_analyze := TRUE;\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_job_id := add_job('PARTMAN CREATE TABLE: '||p_parent_table);\n v_step_id := add_step(v_job_id, 'Creating new partition '||v_partition_name||' with interval from '||v_partition_timestamp_start||' to '||(v_partition_timestamp_end-'1sec'::interval));\n END IF;\n\n v_sql := 'CREATE TABLE '||v_partition_name||' (LIKE '||p_parent_table||' INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES INCLUDING STORAGE INCLUDING COMMENTS)';\n SELECT relhasoids INTO v_hasoids FROM pg_class WHERE oid::regclass = p_parent_table::regclass;\n IF v_hasoids IS TRUE THEN\n v_sql := v_sql || ' WITH (OIDS)';\n END IF;\n EXECUTE v_sql;\n SELECT tablename INTO v_tablename FROM pg_catalog.pg_tables WHERE schemaname ||'.'|| tablename = v_partition_name;\n IF v_parent_tablespace IS NOT NULL THEN\n EXECUTE 'ALTER TABLE '||v_partition_name||' SET TABLESPACE '||v_parent_tablespace;\n END IF;\n EXECUTE 'ALTER TABLE '||v_partition_name||' ADD CONSTRAINT '||v_tablename||'_partition_check\n CHECK ('||v_control||'>='||quote_literal(v_partition_timestamp_start)||' AND '||v_control||'<'||quote_literal(v_partition_timestamp_end)||')';\n EXECUTE 'ALTER TABLE '||v_partition_name||' INHERIT '||p_parent_table;\n\n -- If custom time, set extra config options.\n IF v_type = 'time-custom' THEN\n INSERT INTO @extschema@.custom_time_partitions (parent_table, child_table, partition_range)\n VALUES ( p_parent_table, v_partition_name, tstzrange(v_partition_timestamp_start, v_partition_timestamp_end, '[)') );\n END IF;\n\n FOR v_parent_grant IN\n SELECT array_agg(DISTINCT privilege_type::text ORDER BY privilege_type::text) AS types, grantee\n FROM information_schema.table_privileges\n WHERE table_schema ||'.'|| table_name = p_parent_table\n GROUP BY grantee\n LOOP\n EXECUTE 'GRANT '||array_to_string(v_parent_grant.types, ',')||' ON '||v_partition_name||' TO '||v_parent_grant.grantee;\n SELECT array_agg(r) INTO v_revoke FROM (SELECT unnest(v_all) AS r EXCEPT SELECT unnest(v_parent_grant.types)) x;\n IF v_revoke IS NOT NULL THEN\n EXECUTE 'REVOKE '||array_to_string(v_revoke, ',')||' ON '||v_partition_name||' FROM '||v_parent_grant.grantee||' CASCADE';\n END IF;\n v_grantees := array_append(v_grantees, v_parent_grant.grantee::text);\n END LOOP;\n -- Revoke all privileges from roles that have none on the parent\n IF v_grantees IS NOT NULL THEN\n SELECT array_agg(r) INTO v_revoke FROM (\n SELECT DISTINCT grantee::text AS r FROM information_schema.table_privileges WHERE table_schema ||'.'|| table_name = v_partition_name\n EXCEPT\n SELECT unnest(v_grantees)) x;\n IF v_revoke IS NOT NULL THEN\n EXECUTE 'REVOKE ALL ON '||v_partition_name||' FROM '||array_to_string(v_revoke, ',');\n END IF;\n END IF;\n\n EXECUTE 'ALTER TABLE '||v_partition_name||' OWNER TO '||v_parent_owner;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n IF v_step_overflow_id IS NOT NULL THEN\n PERFORM fail_job(v_job_id);\n ELSE\n PERFORM close_job(v_job_id);\n END IF;\n END IF;\n\nEND LOOP;\n\nIF v_analyze THEN\n EXECUTE 'ANALYZE '||p_parent_table;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n EXECUTE 'SELECT set_config(''search_path'','''||v_old_search_path||''',''false'')';\nEND IF;\n\nRETURN v_partition_name;\n\nEXCEPTION\n WHEN OTHERS THEN\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_job(''PARTMAN CREATE TABLE: '||p_parent_table||''')' INTO v_job_id;\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_step('||v_job_id||', ''EXCEPTION before job logging started'')' INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_step('||v_job_id||', ''EXCEPTION before first step logged'')' INTO v_step_id;\n END IF;\n EXECUTE 'SELECT '||v_jobmon_schema||'.update_step('||v_step_id||', ''CRITICAL'', ''ERROR: '||coalesce(SQLERRM,'unknown')||''')';\n EXECUTE 'SELECT '||v_jobmon_schema||'.fail_job('||v_job_id||')';\n END IF;\n RAISE EXCEPTION '%', SQLERRM;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 9166, "num_statements": 111} {"question": "What is the average number of employees for startups founded in 2019?", "schema": "CREATE TABLE startup (id INT, name TEXT, founding_year INT, num_employees INT); INSERT INTO startup (id, name, founding_year, num_employees) VALUES (1, 'Startup19', 2019, 200); INSERT INTO startup (id, name, founding_year, num_employees) VALUES (2, 'TechStart', 2018, 100);", "sql": "SELECT AVG(num_employees) FROM startup WHERE founding_year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Update community policing event details for event ID 201 in district 4", "schema": "CREATE TABLE districts (id INT, name VARCHAR(255)); INSERT INTO districts (id, name) VALUES (4, 'Southriver'); CREATE TABLE community_policing_events (id INT, district_id INT, title VARCHAR(255), date DATE); INSERT INTO community_policing_events (id, district_id, title, date) VALUES (201, 4, 'Neighborhood Watch Meeting', '2023-03-15'), (202, 4, 'Coffee with a Cop', '2023-03-18');", "sql": "UPDATE community_policing_events SET title = 'Community Safety Meeting', date = '2023-03-17' WHERE id = 201;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Points, when Position is less than 4, when Equipment is Zabel - VMC, and when Bike No is less than 1?", "schema": "CREATE TABLE table_name_19 (points INTEGER, bike_no VARCHAR, position VARCHAR, equipment VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_19 WHERE position < 4 AND equipment = 'zabel - vmc' AND bike_no < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Delete all records in the 'ocean_acidification' table where the 'level' is above 8.0", "schema": "CREATE TABLE ocean_acidification (id INT, date DATE, location VARCHAR(50), level DECIMAL(3,1)); INSERT INTO ocean_acidification (id, date, location, level) VALUES (1, '2021-08-15', 'Caribbean Sea', 7.9); INSERT INTO ocean_acidification (id, date, location, level) VALUES (2, '2022-03-02', 'Sargasso Sea', 8.1);", "sql": "DELETE FROM ocean_acidification WHERE level > 8.0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest number for old membership total?", "schema": "CREATE TABLE table_27671835_3 (old_membership_total INTEGER)", "sql": "SELECT MIN(old_membership_total) FROM table_27671835_3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest drawn that has games less than 7?", "schema": "CREATE TABLE table_name_83 (drawn INTEGER, games INTEGER)", "sql": "SELECT MIN(drawn) FROM table_name_83 WHERE games < 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the maximum number of passengers that can be carried by Airbus A380?", "schema": "CREATE TABLE AircraftSpecifications (Id INT, Manufacturer VARCHAR(50), Model VARCHAR(50), MaxPassengers INT); INSERT INTO AircraftSpecifications (Id, Manufacturer, Model, MaxPassengers) VALUES (1, 'Airbus', 'A380', 853);", "sql": "SELECT MaxPassengers FROM AircraftSpecifications WHERE Manufacturer = 'Airbus' AND Model = 'A380';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "List all the unique safety protocol IDs and their corresponding safety measures from the safety_protocols table.", "schema": "CREATE TABLE safety_protocols (protocol_id INT, safety_measure TEXT); INSERT INTO safety_protocols (protocol_id, safety_measure) VALUES (1, 'Use personal protective equipment'), (2, 'Regular equipment maintenance'), (3, 'Emergency drills');", "sql": "SELECT protocol_id, safety_measure FROM safety_protocols;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the elevation msnm for a population censo 2007(hab) of 77.392*?", "schema": "CREATE TABLE table_name_59 (elevation_msnm VARCHAR, population_censo_2007_hab_ VARCHAR)", "sql": "SELECT elevation_msnm FROM table_name_59 WHERE population_censo_2007_hab_ = '77.392*';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "pgTAP test for Index (assertion 55).", "schema": null, "sql": "/****************************************************************************/\n-- Test index_is_unique().\nSELECT * FROM check_test(\n index_is_unique( 'public', 'sometab', 'idx_baz', 'whatever' ),\n true,\n 'index_is_unique()',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "What is the total number of traditional arts centers and the number of centers dedicated to dance in each continent, excluding Antarctica?", "schema": "CREATE TABLE Arts_Centers (Center_Name VARCHAR(50), Country VARCHAR(50), Type VARCHAR(50)); INSERT INTO Arts_Centers (Center_Name, Country, Type) VALUES ('Sydney Opera House', 'Australia', 'Opera'), ('Teatro Colon', 'Argentina', 'Ballet');", "sql": "SELECT Continent, COUNT(*) AS Total_Arts_Centers, SUM(CASE WHEN Type = 'Dance' THEN 1 ELSE 0 END) AS Dance_Centers FROM Arts_Centers JOIN (SELECT 'Australia' AS Country, 'Oceania' AS Continent UNION ALL SELECT 'Argentina' AS Country, 'South America' AS Continent) AS Continents ON Arts_Centers.Country = Continents.Country GROUP BY Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 342, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 429).", "schema": null, "sql": "SELECT sha224('The quick brown fox jumps over the lazy dog.');", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sha224('The quick brown fox jumps over the lazy dog.')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 621).", "schema": null, "sql": "SELECT '[\"a\",\"b\",\"c\",[1,2],null]'::jsonb -> 2;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[\"a\",\"b\",\"c\",[1,2],null]'::jsonb -> 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "What is the minimum depth of all coastal waters?", "schema": "CREATE TABLE coastal_waters (name VARCHAR(255), depth FLOAT); INSERT INTO coastal_waters (name, depth) VALUES ('Gulf of Mexico', 250.0), ('Baltic Sea', 459.0), ('Black Sea', 2212.0);", "sql": "SELECT MIN(depth) FROM coastal_waters;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "What is the maximum impact measurement for companies based in Asia?", "schema": "CREATE TABLE companies (company_id INT, region VARCHAR(50), impact_measurement FLOAT); INSERT INTO companies (company_id, region, impact_measurement) VALUES (1, 'Asia', 9.2), (2, 'Europe', 7.6), (3, 'Asia', 8.9);", "sql": "SELECT MAX(impact_measurement) FROM companies WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the winning score for the Andy Williams-San Diego Open Invitational tournament?", "schema": "CREATE TABLE table_name_60 (winning_score VARCHAR, tournament VARCHAR)", "sql": "SELECT winning_score FROM table_name_60 WHERE tournament = 'andy williams-san diego open invitational';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Team Rusport has the best of 59.654 and what qual 1?", "schema": "CREATE TABLE table_name_15 (qual_1 VARCHAR, team VARCHAR, best VARCHAR)", "sql": "SELECT qual_1 FROM table_name_15 WHERE team = 'rusport' AND best = '59.654';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Insert a new record into the community_engagement table with the following data: 'Seattle', 'USA', 'Cultural festival', 2000.", "schema": "CREATE TABLE community_engagement (city VARCHAR(50), country VARCHAR(50), event VARCHAR(50), attendees INT);", "sql": "INSERT INTO community_engagement (city, country, event, attendees) VALUES ('Seattle', 'USA', 'Cultural festival', 2000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the total inventory cost for each menu item category in the current quarter?", "schema": "CREATE TABLE Inventory (inventory_id INT PRIMARY KEY, menu_item VARCHAR(50), inventory_quantity INT, inventory_cost DECIMAL(5,2), inventory_date DATE); CREATE TABLE Menu (menu_item VARCHAR(50) PRIMARY KEY, menu_item_category VARCHAR(50));", "sql": "SELECT menu_item_category, SUM(inventory_cost * inventory_quantity) FROM Inventory i JOIN Menu m ON i.menu_item = m.menu_item WHERE i.inventory_date >= DATEADD(quarter, DATEDIFF(quarter, 0, GETDATE()), 0) GROUP BY menu_item_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the outgoing manager that was replaced by Thomas Von Heesen?", "schema": "CREATE TABLE table_name_80 (outgoing_manager VARCHAR, replaced_by VARCHAR)", "sql": "SELECT outgoing_manager FROM table_name_80 WHERE replaced_by = 'thomas von heesen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Year has a Competition of european championships, and Notes of 66.81 m?", "schema": "CREATE TABLE table_name_26 (year INTEGER, competition VARCHAR, notes VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_26 WHERE competition = 'european championships' AND notes = '66.81 m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of Colorado when they were a visitor and had a Smith decision?", "schema": "CREATE TABLE table_name_32 (score VARCHAR, decision VARCHAR, visitor VARCHAR)", "sql": "SELECT score FROM table_name_32 WHERE decision = 'smith' AND visitor = 'colorado';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what row is the population of Latin America/Caribbean when Asia is 4,894 (46.1%)?", "schema": "CREATE TABLE table_19017269_5 (latin_america_caribbean VARCHAR, asia VARCHAR)", "sql": "SELECT COUNT(latin_america_caribbean) FROM table_19017269_5 WHERE asia = '4,894 (46.1%)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What are the names of categories with no projects?", "schema": "CREATE TABLE project (id INT, name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE, category VARCHAR(20)); INSERT INTO project (id, name, location, start_date, end_date, category) VALUES (4, 'Utility Relocation', 'City D', '2021-01-01', '2021-12-31', 'Utilities'); CREATE TABLE category (id INT, name VARCHAR(20)); INSERT INTO category (id, name) VALUES (1, 'Resilience'), (2, 'Design'), (3, 'Infrastructure');", "sql": "SELECT c.name FROM category c WHERE c.name NOT IN (SELECT DISTINCT category FROM project);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of amenities Smith Hall dorm have.", "schema": "CREATE TABLE has_amenity (dormid VARCHAR, amenid VARCHAR); CREATE TABLE dorm_amenity (amenity_name VARCHAR, amenid VARCHAR); CREATE TABLE dorm (dormid VARCHAR, dorm_name VARCHAR)", "sql": "SELECT T3.amenity_name FROM dorm AS T1 JOIN has_amenity AS T2 ON T1.dormid = T2.dormid JOIN dorm_amenity AS T3 ON T2.amenid = T3.amenid WHERE T1.dorm_name = 'Smith Hall';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When Melbourne was the Away team, what was their score?", "schema": "CREATE TABLE table_name_75 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_75 WHERE away_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For majors with 8 events played and more than 1 made cut, what is the most top-10s recorded?", "schema": "CREATE TABLE table_name_9 (top_10 INTEGER, cuts_made VARCHAR, events VARCHAR)", "sql": "SELECT MAX(top_10) FROM table_name_9 WHERE cuts_made > 1 AND events = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average fuel efficiency of electric vehicles by model?", "schema": "CREATE TABLE ElectricVehicleFuelEfficiency(Model VARCHAR(50), Make VARCHAR(50), MilesPerGallon FLOAT);", "sql": "SELECT Model, AVG(MilesPerGallon) FROM ElectricVehicleFuelEfficiency GROUP BY Model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total budget allocation for education services in CityA for the current fiscal year?", "schema": "CREATE TABLE fiscal_year (fiscal_year INT, start_date DATE, end_date DATE); INSERT INTO fiscal_year VALUES (2022, '2022-01-01', '2022-12-31'), (2023, '2023-01-01', '2023-12-31'); CREATE TABLE budget_allocation (service VARCHAR(20), fiscal_year INT, amount INT); INSERT INTO budget_allocation VALUES ('Education', 2022, 500000), ('Healthcare', 2022, 800000), ('Education', 2023, 600000), ('Healthcare', 2023, 900000); CREATE TABLE cities (id INT, name VARCHAR(20)); INSERT INTO cities VALUES (1, 'CityA'), (2, 'CityB'), (3, 'CityC');", "sql": "SELECT SUM(amount) FROM budget_allocation WHERE service = 'Education' AND fiscal_year = (SELECT fiscal_year FROM fiscal_year WHERE start_date <= CURRENT_DATE AND end_date >= CURRENT_DATE) AND city_id = (SELECT id FROM cities WHERE name = 'CityA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "Calculate the percentage of volunteers who are under 18 years old, and list them along with their total volunteer hours for the current year.", "schema": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName VARCHAR(50), DateOfBirth DATE); INSERT INTO Volunteers (VolunteerID, VolunteerName, DateOfBirth) VALUES (1, 'James Brown', '1993-01-01'), (2, 'Jessica White', '2003-01-01'); CREATE TABLE VolunteerHours (VolunteerID INT, Hours INT, VolunteerDate DATE); INSERT INTO VolunteerHours (VolunteerID, Hours, VolunteerDate) VALUES (1, 5, '2021-01-01'), (1, 6, '2021-02-01'), (1, 7, '2021-03-01'), (2, 4, '2021-01-01'), (2, 3, '2021-02-01'), (2, 2, '2021-03-01');", "sql": "SELECT ROUND(COUNT(CASE WHEN TIMESTAMPDIFF(YEAR, Volunteers.DateOfBirth, CURDATE()) < 18 THEN Volunteers.VolunteerID END) / COUNT(*) * 100, 2) AS Under18Percentage, Volunteers.VolunteerName, SUM(VolunteerHours.Hours) AS TotalHoursForYear FROM Volunteers INNER JOIN VolunteerHours ON Volunteers.VolunteerID = VolunteerHours.VolunteerID AND VolunteerHours.VolunteerDate <= CURDATE() AND YEAR(VolunteerHours.VolunteerDate) = YEAR(CURDATE()) GROUP BY Volunteers.VolunteerName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 472, "num_statements": 1} {"question": "What is the total number of menu items sold in the Western region?", "schema": "CREATE TABLE menu (menu_id INT, menu_name TEXT, menu_type TEXT, price DECIMAL, daily_sales INT, region TEXT);", "sql": "SELECT SUM(daily_sales) FROM menu WHERE region = 'Western';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the maximum ocean acidification level recorded in the Atlantic region in the last 5 years?\"", "schema": "CREATE TABLE ocean_acidification_levels (location TEXT, acidification_level REAL, measurement_date DATE); CREATE TABLE atlantic_region (region_name TEXT, region_description TEXT);", "sql": "SELECT MAX(oal.acidification_level) FROM ocean_acidification_levels oal INNER JOIN atlantic_region ar ON oal.location LIKE '%Atlantic%' AND oal.measurement_date >= (CURRENT_DATE - INTERVAL '5 years');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "What is the average life expectancy in 'Los Angeles County'?", "schema": "CREATE TABLE life_expectancy_data (county VARCHAR(255), life_expectancy FLOAT); INSERT INTO life_expectancy_data (county, life_expectancy) VALUES ('Los Angeles County', 81.7), ('Orange County', 83.2);", "sql": "SELECT life_expectancy FROM life_expectancy_data WHERE county = 'Los Angeles County';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the competition for score of 3-0", "schema": "CREATE TABLE table_name_42 (competition VARCHAR, score VARCHAR)", "sql": "SELECT competition FROM table_name_42 WHERE score = '3-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total number of education projects in each continent?", "schema": "CREATE TABLE Projects (project_id INT, project_location VARCHAR(50), project_type VARCHAR(50)); INSERT INTO Projects (project_id, project_location, project_type) VALUES (1, 'India', 'Community Development'), (2, 'Canada', 'Education'), (3, 'Kenya', 'Education');", "sql": "SELECT project_location, COUNT(*) AS 'Total Projects' FROM Projects WHERE project_type = 'Education' GROUP BY project_location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 19).", "schema": null, "sql": "--\n-- gin_leafpage_items()\n--\nCREATE FUNCTION gin_leafpage_items(IN page bytea,\n OUT first_tid tid,\n OUT nbytes int2,\n OUT tids tid[])\nRETURNS SETOF record\nAS 'MODULE_PATHNAME', 'gin_leafpage_items'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "What are the average soil moisture levels for vineyards in France and Spain?", "schema": "CREATE TABLE VineyardSoilMoisture (country VARCHAR(20), region VARCHAR(30), moisture FLOAT); INSERT INTO VineyardSoilMoisture (country, region, moisture) VALUES ('France', 'Bordeaux', 42.3), ('France', 'Burgundy', 48.1), ('Spain', 'Rioja', 39.5), ('Spain', 'Ribera del Duero', 45.6);", "sql": "SELECT AVG(moisture) FROM VineyardSoilMoisture WHERE country IN ('France', 'Spain') AND region IN ('Bordeaux', 'Burgundy', 'Rioja', 'Ribera del Duero');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What are the names of all restaurants that serve vegetarian options?", "schema": "CREATE TABLE Restaurants (name VARCHAR(255), vegetarian BOOLEAN); INSERT INTO Restaurants (name, vegetarian) VALUES ('Bistro Veggie', TRUE), ('Pizza House', FALSE), ('Vegan Delight', TRUE);", "sql": "SELECT name FROM Restaurants WHERE vegetarian = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'hstore_plperl' (item 3).", "schema": null, "sql": "-- test perl -> hstore\nCREATE FUNCTION test2() RETURNS hstore\nLANGUAGE plperl\nTRANSFORM FOR TYPE hstore\nAS $$\n$val = {a => 1, b => 'boo', c => undef};\nreturn $val;\n$$;", "explanation": "SQL definition from the 'hstore_plperl' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: List the most common result of the musicals.", "schema": "CREATE TABLE musical (RESULT VARCHAR)", "sql": "SELECT RESULT FROM musical GROUP BY RESULT ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Identify the number of unique countries that imported Terbium in 2021.", "schema": "CREATE TABLE TerbiumImports (country VARCHAR(50), year INT); INSERT INTO TerbiumImports (country, year) VALUES ('USA', 2021), ('China', 2021), ('Japan', 2021), ('USA', 2021), ('South Korea', 2021);", "sql": "SELECT COUNT(DISTINCT country) FROM TerbiumImports WHERE year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the maximum number of workforce development programs offered by companies in a single country?", "schema": "CREATE TABLE companies (id INT, name TEXT, country TEXT, num_workforce_programs INT); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (1, 'Empowerment Enterprises', 'USA', 3); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (2, 'Skillset Solutions', 'Canada', 2); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (3, 'Proficiency Partners', 'Mexico', 4); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (4, 'Abilities Associates', 'Brazil', 5); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (5, 'Capability Creations', 'USA', 1); INSERT INTO companies (id, name, country, num_workforce_programs) VALUES (6, 'Knowledge Kingdom', 'Canada', 6);", "sql": "SELECT MAX(num_workforce_programs) AS max_programs FROM companies WHERE country IN ('USA', 'Canada', 'Mexico', 'Brazil');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Which biotech startups have received funding over 10 million and are located in California?", "schema": "CREATE TABLE startups (id INT, name VARCHAR(50), location VARCHAR(50), funding FLOAT); INSERT INTO startups (id, name, location, funding) VALUES (1, 'Genetech', 'California', 12000000); INSERT INTO startups (id, name, location, funding) VALUES (2, 'Zymergen', 'California', 25000000);", "sql": "SELECT name FROM startups WHERE funding > 10000000 AND location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER FOREIGN DATA WRAPPER (example 1).", "schema": null, "sql": "ALTER FOREIGN DATA WRAPPER dbi OPTIONS (ADD foo '1', DROP bar);", "explanation": "PostgreSQL ALTER FOREIGN DATA WRAPPER command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average number of points scored by basketball players in each game in the last month?", "schema": "CREATE TABLE basketball_games (id INT, game_date DATE, sport VARCHAR(50), average_points_per_player DECIMAL(5,2));", "sql": "SELECT AVG(average_points_per_player) FROM basketball_games WHERE sport = 'basketball' AND game_date >= DATEADD(month, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Get all products that use 'Recycled Material' from 'material' table", "schema": "CREATE TABLE material (material_id VARCHAR(10), name VARCHAR(50), description TEXT, primary key (material_id));", "sql": "SELECT * FROM product p JOIN product_material pm ON p.product_id = pm.product_id JOIN material m ON pm.material_id = m.material_id WHERE m.name = 'Recycled Material';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "What was the total value of military equipment sales to India in H1 2021?", "schema": "CREATE TABLE military_sales (id INT, region VARCHAR, sale_value DECIMAL, sale_date DATE); INSERT INTO military_sales (id, region, sale_value, sale_date) VALUES (1, 'India', 20000, '2021-02-12'); INSERT INTO military_sales (id, region, sale_value, sale_date) VALUES (2, 'India', 18000, '2021-04-28'); INSERT INTO military_sales (id, region, sale_value, sale_date) VALUES (3, 'India', 15000, '2021-01-15');", "sql": "SELECT SUM(sale_value) FROM military_sales WHERE region = 'India' AND sale_date BETWEEN '2021-01-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Hometown of Candidate Lenny Veltman?", "schema": "CREATE TABLE table_name_73 (hometown VARCHAR, candidate VARCHAR)", "sql": "SELECT hometown FROM table_name_73 WHERE candidate = 'lenny veltman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average attendance of the game with 38 opponent and less than 14 Falcons points?", "schema": "CREATE TABLE table_name_63 (attendance INTEGER, opponents VARCHAR, falcons_points VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_63 WHERE opponents = 38 AND falcons_points < 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With less than 1 Championship, what es the Established date of the Niagara Rugby Union League?", "schema": "CREATE TABLE table_name_40 (established INTEGER, championships VARCHAR, league VARCHAR)", "sql": "SELECT SUM(established) FROM table_name_40 WHERE championships < 1 AND league = 'niagara rugby union';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Date, when Team is \"Orlando\"?", "schema": "CREATE TABLE table_name_72 (date VARCHAR, team VARCHAR)", "sql": "SELECT date FROM table_name_72 WHERE team = 'orlando';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the average budget for climate adaptation projects in South America?", "schema": "CREATE TABLE project_budget (project_id INT, budget DECIMAL); INSERT INTO project_budget (project_id, budget) VALUES (1, 5000000.00);", "sql": "SELECT AVG(budget) FROM project_budget JOIN climate_project ON project_budget.project_id = climate_project.project_id WHERE climate_project.project_type = 'Adaptation' AND climate_project.project_region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "How many AI safety incidents occurred in the 'asia' region in 2020?", "schema": "CREATE TABLE ai_safety_incidents (region TEXT, year INTEGER, incidents INTEGER); INSERT INTO ai_safety_incidents (region, year, incidents) VALUES ('europe', 2020, 10), ('africa', 2020, 5), ('asia', 2020, 15);", "sql": "SELECT SUM(incidents) FROM ai_safety_incidents WHERE region = 'asia' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total revenue for each menu category in a specific country?", "schema": "CREATE TABLE menus (menu_id INT, category VARCHAR(255)); INSERT INTO menus VALUES (1, 'Appetizers'); INSERT INTO menus VALUES (2, 'Entrees'); INSERT INTO menus VALUES (3, 'Desserts'); CREATE TABLE sales (sale_id INT, menu_id INT, quantity INT, country VARCHAR(255), price DECIMAL(10, 2));", "sql": "SELECT m.category, SUM(s.price * s.quantity) as total_revenue FROM menus m INNER JOIN sales s ON m.menu_id = s.menu_id WHERE s.country = 'USA' GROUP BY m.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the total supply of ERC20 tokens issued by addresses located in India?", "schema": "CREATE TABLE addresses (address VARCHAR(42), country VARCHAR(2)); INSERT INTO addresses (address, country) VALUES ('0x123', 'IN'), ('0x456', 'US'), ('0x789', 'IN'); CREATE TABLE erc20_tokens (token_name VARCHAR(10), address VARCHAR(42), total_supply BIGINT); INSERT INTO erc20_tokens (token_name, address, total_supply) VALUES ('TokenA', '0x123', 1000000), ('TokenB', '0x456', 2000000), ('TokenC', '0x789', 1500000);", "sql": "SELECT SUM(total_supply) FROM erc20_tokens t JOIN addresses a ON t.address = a.address WHERE a.country = 'IN';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "How many 'species' are found in 'Boreal Forests'?", "schema": "CREATE TABLE species (id INT, name VARCHAR(50), habitat VARCHAR(50)); INSERT INTO species (id, name, habitat) VALUES (1, 'Canadian Lynx', 'Boreal Forests');", "sql": "SELECT COUNT(*) FROM species WHERE habitat = 'Boreal Forests';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What was the total cost of all infrastructure projects in the Philippines in 2020?'", "schema": "CREATE TABLE infrastructure_projects (id INT, country VARCHAR(255), year INT, cost FLOAT); INSERT INTO infrastructure_projects (id, country, year, cost) VALUES (1, 'Philippines', 2020, 500000.00), (2, 'Philippines', 2019, 600000.00);", "sql": "SELECT SUM(cost) FROM infrastructure_projects WHERE country = 'Philippines' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What are the total revenues for each transport mode, excluding the 'Premium' fare type?", "schema": "CREATE TABLE Fares (FareID int, FareType varchar(50), TransportMode varchar(50), Revenue int); INSERT INTO Fares VALUES (1, 'Standard', 'Bus', 5000); INSERT INTO Fares VALUES (2, 'Discounted', 'Bus', 3000); INSERT INTO Fares VALUES (3, 'Standard', 'Subway', 7000); INSERT INTO Fares VALUES (4, 'Discounted', 'Subway', 4000); INSERT INTO Fares VALUES (5, 'Premium', 'Tram', 6000);", "sql": "SELECT TransportMode, SUM(Revenue) FROM Fares WHERE FareType <> 'Premium' GROUP BY TransportMode;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Get the status and count of shipments for each warehouse_id from the shipment table grouped by status and warehouse_id", "schema": "CREATE TABLE shipment (shipment_id VARCHAR(10), status VARCHAR(20), warehouse_id VARCHAR(10), carrier_name VARCHAR(30), shipped_date DATE);", "sql": "SELECT status, warehouse_id, COUNT(*) as count FROM shipment GROUP BY status, warehouse_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent on april 26, 2003?", "schema": "CREATE TABLE table_name_25 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_25 WHERE date = 'april 26, 2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year was The House of Blue Leaves nominated for outstanding actress in a play?", "schema": "CREATE TABLE table_name_62 (year INTEGER, nominated_work VARCHAR, category VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_62 WHERE nominated_work = 'the house of blue leaves' AND category = 'outstanding actress in a play';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Find the average billing rate for attorneys in 'billing' table, excluding attorneys with less than 10 hours billed", "schema": "CREATE TABLE billing (attorney_id INT, client_id INT, hours_billed INT, billing_rate DECIMAL(5,2));", "sql": "SELECT AVG(billing_rate) FROM billing WHERE hours_billed >= 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 168).", "schema": null, "sql": "CREATE FUNCTION gbt_macad_penalty(internal,internal,internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "How many defense projects exceeded their budget in the last 3 years?", "schema": "CREATE TABLE DefenseProjects (id INT, project_name VARCHAR(255), start_date DATE, end_date DATE, budget INT, actual_cost INT); INSERT INTO DefenseProjects (id, project_name, start_date, end_date, budget, actual_cost) VALUES (1, 'Project A', '2019-01-01', '2021-12-31', 10000000, 11000000), (2, 'Project B', '2018-01-01', '2020-12-31', 8000000, 7500000);", "sql": "SELECT COUNT(*) FROM DefenseProjects WHERE actual_cost > budget AND start_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Tournaments, when Highest Rank is \"Maegashira 1\"?", "schema": "CREATE TABLE table_name_90 (tournaments INTEGER, highest_rank VARCHAR)", "sql": "SELECT AVG(tournaments) FROM table_name_90 WHERE highest_rank = 'maegashira 1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 20).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION finish ();", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "PostgreSQL regression test 'polygon': Write the SELECT query (example 47).", "schema": null, "sql": "SELECT count(*) FROM quad_poly_tbl WHERE p @> polygon '((340,550),(343,552),(341,553))';", "explanation": "Regression test for Polygon in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM quad_poly_tbl WHERE p @> polygon '((340,550),(343,552),(341,553))') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average number of employees for companies in the 'Data Science' sector with ethical AI?", "schema": "CREATE TABLE ai_companies (id INT, name VARCHAR(20), location VARCHAR(20), sector VARCHAR(20), employees INT, ethical_ai BOOLEAN); INSERT INTO ai_companies (id, name, location, sector, employees, ethical_ai) VALUES (2, 'XYZ Tech', 'USA', 'Data Science', 30, true);", "sql": "SELECT sector, AVG(employees) as avg_employees FROM ai_companies WHERE ethical_ai = true AND sector = 'Data Science' GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 140).", "schema": null, "sql": "SELECT jsonb_exists_all('{\"a\":null, \"b\":\"qq\"}', ARRAY['c','d']);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_exists_all('{\"a\":null, \"b\":\"qq\"}', ARRAY['c','d'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (vacuum, item 29).", "schema": null, "sql": "CREATE TABLE vacuum_norm(time timestamp, temp float);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 99).", "schema": null, "sql": "CREATE AGGREGATE myaggp19b(BASETYPE = anyelement, SFUNC = tf1p,\n STYPE = anyarray, INITCOND = '{}');", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 101, "num_statements": 1} {"question": "Insert new data into the 'drought_impact' table reflecting the drought conditions in 'Europe'", "schema": "CREATE TABLE drought_impact (id INT PRIMARY KEY, location VARCHAR(20), impact_level VARCHAR(10));", "sql": "INSERT INTO drought_impact (id, location, impact_level) VALUES (7, 'Mediterranean', 'severe'), (8, 'Northern Europe', 'moderate'), (9, 'Eastern Europe', 'mild');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "List all departments and the number of employees in each", "schema": "CREATE TABLE departments (id INT, name VARCHAR(255)); INSERT INTO departments (id, name) VALUES (1, 'Human Resources'), (2, 'Public Works'), (3, 'Education'); CREATE TABLE employees (id INT, department_id INT, gender VARCHAR(10)); INSERT INTO employees (id, department_id, gender) VALUES (1, 1, 'Female'), (2, 2, 'Male'), (3, 3, 'Female'), (4, 1, 'Non-binary'), (5, 3, 'Male');", "sql": "SELECT d.name, COUNT(e.id) as employee_count FROM departments d LEFT JOIN employees e ON d.id = e.department_id GROUP BY d.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which years did the player from Providence play for the Grizzlies as small forward?", "schema": "CREATE TABLE table_name_96 (years_for_grizzlies VARCHAR, position VARCHAR, school_club_team VARCHAR)", "sql": "SELECT years_for_grizzlies FROM table_name_96 WHERE position = 'small forward' AND school_club_team = 'providence';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Identify the total number of regulatory actions taken against smart contracts in the Antarctic region.", "schema": "CREATE TABLE smart_contracts (contract_id INT, contract_name VARCHAR(50), region VARCHAR(50)); CREATE TABLE regulatory_actions (action_id INT, contract_id INT, action_date DATE); INSERT INTO smart_contracts (contract_id, contract_name, region) VALUES (1, 'Contract A', 'Antarctic'), (2, 'Contract B', 'Antarctic'); INSERT INTO regulatory_actions (action_id, contract_id, action_date) VALUES (1, 1, '2022-01-01'), (2, 1, '2022-02-01'), (3, 2, '2022-03-01');", "sql": "SELECT COUNT(*) FROM regulatory_actions r JOIN smart_contracts s ON r.contract_id = s.contract_id WHERE s.region = 'Antarctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the goals at Sydney?", "schema": "CREATE TABLE table_name_16 (goals VARCHAR, venue VARCHAR)", "sql": "SELECT goals FROM table_name_16 WHERE venue = 'sydney';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 35).", "schema": null, "sql": "SELECT ts_lexize('hunspell_long', 'bookings');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_lexize('hunspell_long', 'bookings')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Attack has a 30,00% GERB?", "schema": "CREATE TABLE table_name_74 (attack VARCHAR, gerb VARCHAR)", "sql": "SELECT attack FROM table_name_74 WHERE gerb = '30,00%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: It was announced on July 2, 2006 that what asset was acquired?", "schema": "CREATE TABLE table_1373542_1 (asset_acquired VARCHAR, date_announced VARCHAR)", "sql": "SELECT asset_acquired FROM table_1373542_1 WHERE date_announced = 'July 2, 2006';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Who are the community leaders engaging in cultural preservation in Africa?", "schema": "CREATE TABLE community_leaders (id INT, name TEXT, role TEXT, site TEXT, region TEXT); INSERT INTO community_leaders (id, name, role, site, region) VALUES (1, 'Farouk Topan', 'Scholar', 'Bagamoyo', 'Africa');", "sql": "SELECT name FROM community_leaders WHERE region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average runtime of all TV shows in the \"tv_shows\" table?", "schema": "CREATE TABLE tv_shows (id INT, name VARCHAR(100), genre VARCHAR(50), runtime INT);", "sql": "SELECT AVG(runtime) FROM tv_shows;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What are the total number of research grants awarded to domestic and international graduate students?", "schema": "CREATE TABLE research_grants (id INT, student_type VARCHAR(10), amount DECIMAL(10,2)); INSERT INTO research_grants (id, student_type, amount) VALUES (1, 'Domestic', 15000.00), (2, 'International', 20000.00);", "sql": "SELECT SUM(amount) FROM research_grants WHERE student_type IN ('Domestic', 'International');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the first name and last name of the instructor of course that has course name", "schema": "CREATE TABLE FACULTY (Fname VARCHAR, Lname VARCHAR, FacID VARCHAR); CREATE TABLE COURSE (Instructor VARCHAR, CName VARCHAR)", "sql": "SELECT T2.Fname, T2.Lname FROM COURSE AS T1 JOIN FACULTY AS T2 ON T1.Instructor = T2.FacID WHERE T1.CName = 'COMPUTER LITERACY';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is writtenand directed by shannon flynn?", "schema": "CREATE TABLE table_24018430_3 (written_by VARCHAR, directed_by VARCHAR)", "sql": "SELECT written_by FROM table_24018430_3 WHERE directed_by = 'Shannon Flynn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Annual change that has the Location of manaus?", "schema": "CREATE TABLE table_name_48 (annual_change VARCHAR, location VARCHAR)", "sql": "SELECT annual_change FROM table_name_48 WHERE location = 'manaus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of artworks by Aboriginal artists in the 'ArtCollection' table?", "schema": "CREATE TABLE ArtCollection (id INT, artist VARCHAR(50), title VARCHAR(100), year INT, type VARCHAR(50), PRIMARY KEY (id));", "sql": "SELECT SUM(CASE WHEN artist LIKE '%Aboriginal%' THEN 1 ELSE 0 END) FROM ArtCollection;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE TOTAL NUMBER OF SEATS IN Hamburgische Bürgerschaft WITH AN ABBR OF bündnis 90 / die grünen (gal)?", "schema": "CREATE TABLE table_name_18 (seats_in_hamburgische_bürgerschaft VARCHAR, abbr VARCHAR)", "sql": "SELECT COUNT(seats_in_hamburgische_bürgerschaft) FROM table_name_18 WHERE abbr = 'bündnis 90 / die grünen (gal)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What is the average CO2 emission per coal production in WA?", "schema": "CREATE TABLE co2_production (site VARCHAR(20), state VARCHAR(20), co2_emission INT, production INT); INSERT INTO co2_production (site, state, co2_emission, production) VALUES ('SiteA', 'QLD', 2500, 1500), ('SiteB', 'NSW', 3000, 2000), ('SiteC', 'WA', 3500, 1800);", "sql": "SELECT state, AVG(co2_emission/production) FROM co2_production WHERE state = 'WA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the highest player number from the list from 2000-2001", "schema": "CREATE TABLE table_16494599_5 (no INTEGER, years_for_grizzlies VARCHAR)", "sql": "SELECT MAX(no) FROM table_16494599_5 WHERE years_for_grizzlies = '2000-2001';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 150).", "schema": null, "sql": "SELECT '0'::seg <@ '-1 .. 1'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the maximum age of community health workers in each state?", "schema": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), age INT, state VARCHAR(2)); INSERT INTO community_health_workers (id, name, age, state) VALUES (1, 'John Doe', 45, 'Texas'), (2, 'Jane Smith', 35, 'California'), (3, 'Alice Johnson', 40, 'California'), (4, 'Bob Brown', 50, 'New York');", "sql": "SELECT state, MAX(age) FROM community_health_workers GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which infrastructure projects in California have experienced cost overruns of over 30%?", "schema": "CREATE TABLE projects (project_id INT, project_name VARCHAR(100), state CHAR(2), planned_cost FLOAT, actual_cost FLOAT); INSERT INTO projects VALUES (1, 'CA Bullet Train', 'CA', 60000000000, 80000000000), (2, 'LA Subway Extension', 'CA', 10000000000, 13000000000), (3, 'SF Bay Bridge Retrofit', 'CA', 7000000000, 9000000000);", "sql": "SELECT * FROM projects WHERE state = 'CA' AND actual_cost > planned_cost * 1.3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total waste generation rate in the 'Commercial' sector?", "schema": "CREATE TABLE CommercialWaste (id INT, sector VARCHAR(20), waste_generation_rate FLOAT); INSERT INTO CommercialWaste (id, sector, waste_generation_rate) VALUES (1, 'Commercial', 1.5), (2, 'Commercial', 1.8);", "sql": "SELECT SUM(waste_generation_rate) FROM CommercialWaste WHERE sector = 'Commercial';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'expressions' (example 60).", "schema": null, "sql": "create operator = (\n leftarg = myint,\n rightarg = myint,\n commutator = =,\n negator = <>,\n procedure = myinteq,\n restrict = eqsel,\n join = eqjoinsel,\n merges\n);", "explanation": "DDL from PostgreSQL core regression test for Expressions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 184, "num_statements": 1} {"question": "pgTAP test for Partitions (assertion 36).", "schema": null, "sql": "SELECT * FROM check_test(\n is_partition_of( 'nonesuch', 'parted', 'whatevs' ),\n false,\n 'is_partition_of( non-ctab, ptab, desc )',\n 'whatevs',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Partitions.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the dry density is 800, what is the drying shrinkage %?", "schema": "CREATE TABLE table_24969173_1 (drying_shrinkage___percentage_ VARCHAR, dry_density__kg_m3_ VARCHAR)", "sql": "SELECT drying_shrinkage___percentage_ FROM table_24969173_1 WHERE dry_density__kg_m3_ = 800;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the scores in matches against Tony Roche?", "schema": "CREATE TABLE table_2201724_1 (score_in_the_final VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT score_in_the_final FROM table_2201724_1 WHERE opponent_in_the_final = 'Tony Roche';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the power kw@RPM of the 13.180 model?", "schema": "CREATE TABLE table_11497980_1 (power_kw VARCHAR, model VARCHAR)", "sql": "SELECT COUNT(power_kw) AS @rpm FROM table_11497980_1 WHERE model = '13.180';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of Louis floch", "schema": "CREATE TABLE table_24565004_7 (nationality² VARCHAR, name VARCHAR)", "sql": "SELECT nationality² FROM table_24565004_7 WHERE name = 'Louis Floch';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "pgTAP test for Ruletap (assertion 41).", "schema": null, "sql": "SELECT * FROM check_test(\n rule_is_on( 'public', 'sometab', 'ins_me', 'i', 'whatever' ),\n true,\n 'rule_is_on(schema, table, rule, i, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ruletap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "What is the average manufacturing cost of spacecrafts produced in the US?", "schema": "CREATE TABLE SpacecraftManufacturing (id INT, company VARCHAR(255), country VARCHAR(255), cost FLOAT); INSERT INTO SpacecraftManufacturing (id, company, country, cost) VALUES (1, 'SpaceX', 'USA', 50000000), (2, 'Blue Origin', 'USA', 70000000), (3, 'Roscosmos', 'Russia', 30000000);", "sql": "SELECT AVG(cost) FROM SpacecraftManufacturing WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Find the five rural healthcare professionals with the highest salaries and their respective job titles.", "schema": "CREATE TABLE professionals (name VARCHAR(255), job_title VARCHAR(255), salary NUMERIC(10, 2)); INSERT INTO professionals (name, job_title, salary) VALUES ('Professional A', 'Doctor', 90000), ('Professional B', 'Nurse', 60000);", "sql": "SELECT name, job_title, salary FROM professionals ORDER BY salary DESC LIMIT 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Delete the record of the vessel 'Endeavour' if it didn't transport any cargo in the last month.", "schema": "CREATE TABLE Vessels (ID INT, Name VARCHAR(255), CargoQuantity INT, LastCargoArrival DATETIME); INSERT INTO Vessels (ID, Name, CargoQuantity, LastCargoArrival) VALUES (1, 'Endeavour', 0, '2022-01-01'), (2, 'Pioneer', 100, '2022-02-01');", "sql": "DELETE FROM Vessels WHERE Name = 'Endeavour' AND CargoQuantity = 0 AND LastCargoArrival < DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the symbol of the element with an empirical t of 190 and a calculated value of 135?", "schema": "CREATE TABLE table_name_25 (symbol VARCHAR, empirical_† VARCHAR, calculated VARCHAR)", "sql": "SELECT symbol FROM table_name_25 WHERE empirical_† = '190' AND calculated = '135';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total carbon offset for projects in the 'carbon_offset' table in the 'Americas' region?", "schema": "CREATE TABLE carbon_offset (id INT, project_name TEXT, location TEXT, carbon_offset_value FLOAT); INSERT INTO carbon_offset (id, project_name, location, carbon_offset_value) VALUES (1, 'Project 1', 'Brazil', 1000.5), (2, 'Project 2', 'Canada', 1200.3);", "sql": "SELECT SUM(carbon_offset_value) FROM carbon_offset WHERE location LIKE '%Americas%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many rounds were played on May 8?", "schema": "CREATE TABLE table_25773116_2 (round VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(round) FROM table_25773116_2 WHERE date = 'May 8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which episode did sarah millican and grayson perry appear in", "schema": "CREATE TABLE table_29135051_3 (episode INTEGER, guest_s_ VARCHAR)", "sql": "SELECT MIN(episode) FROM table_29135051_3 WHERE guest_s_ = 'Sarah Millican and Grayson Perry';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Upper stage has Launches to date of 0, and Version of 532?", "schema": "CREATE TABLE table_name_86 (upper_stage VARCHAR, launches_to_date VARCHAR, version VARCHAR)", "sql": "SELECT upper_stage FROM table_name_86 WHERE launches_to_date = 0 AND version = '532';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the Winning is 71-71-70-69=281, what is the To par?", "schema": "CREATE TABLE table_name_7 (to_par VARCHAR, winning_score VARCHAR)", "sql": "SELECT to_par FROM table_name_7 WHERE winning_score = 71 - 71 - 70 - 69 = 281;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who's the Republican ticket with a Communist ticket of elizabeth gurley flynn?", "schema": "CREATE TABLE table_name_32 (republican_ticket VARCHAR, communist_ticket VARCHAR)", "sql": "SELECT republican_ticket FROM table_name_32 WHERE communist_ticket = 'elizabeth gurley flynn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total donation amount by age group?", "schema": "CREATE TABLE donor (did INT, age INT, total_donation DECIMAL(10, 2)); INSERT INTO donor (did, age, total_donation) VALUES (1, 30, 1500), (2, 45, 1200), (3, 22, 800), (4, 50, 1700), (5, 28, 900);", "sql": "SELECT age_group, SUM(total_donation) as total_donation FROM (SELECT CASE WHEN age < 30 THEN '18-30' WHEN age < 50 THEN '31-50' ELSE '51+' END as age_group, total_donation FROM donor) t GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "List the defense projects with their start and end dates in the Middle East?", "schema": "CREATE TABLE projects(id INT, name VARCHAR(255), start_date DATE, end_date DATE, region VARCHAR(255));", "sql": "SELECT name, start_date, end_date FROM projects WHERE region = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Position does the Team of KR and the Player of Marcus Walker have listed?", "schema": "CREATE TABLE table_name_8 (position VARCHAR, team VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_name_8 WHERE team = 'kr' AND player = 'marcus walker';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "How many members joined in the first six months of 2022, by age group?", "schema": "CREATE TABLE memberships (membership_id INT, join_date DATE, age INT); INSERT INTO memberships (membership_id, join_date, age) VALUES (1, '2022-01-15', 25), (2, '2022-04-20', 32), (3, '2022-06-05', 28);", "sql": "SELECT COUNT(*) as first_half_count, FLOOR(age / 10) * 10 as age_group FROM memberships WHERE join_date BETWEEN '2022-01-01' AND '2022-06-30' GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Show the total revenue generated per day for the past week", "schema": "CREATE TABLE sales (sale_id INT, sale_date DATE, revenue DECIMAL(10,2));", "sql": "SELECT DATE(sale_date) AS sale_day, SUM(revenue) AS total_revenue FROM sales WHERE sale_date >= DATE(NOW()) - INTERVAL 7 DAY GROUP BY sale_day;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 441).", "schema": null, "sql": "create table idxpart_another_1 partition of idxpart_another for values from (0) to (100);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the length when the expected start of revenue is more than 2017?", "schema": "CREATE TABLE table_name_62 (length VARCHAR, expected_start_of_revenue_services INTEGER)", "sql": "SELECT length FROM table_name_62 WHERE expected_start_of_revenue_services > 2017;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What are the names and locations of healthcare workers in the \"rural_healthcenters\" table?", "schema": "CREATE TABLE rural_healthcenters (id INT, name TEXT, location TEXT, position TEXT); INSERT INTO rural_healthcenters (id, name, location, position) VALUES (1, 'Healthcenter A', 'Rural Area 1', 'Doctor'), (2, 'Healthcenter B', 'Rural Area 2', 'Nurse'), (3, 'Healthcenter C', 'Rural Area 3', 'Admin');", "sql": "SELECT name, location FROM rural_healthcenters;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the total revenue generated from members in the \"Young Adults\" demographic segment for the year 2020?", "schema": "CREATE SCHEMA fitness; CREATE TABLE membership (member_id INT, demographic_segment VARCHAR(20)); CREATE TABLE revenue (member_id INT, revenue DECIMAL(10,2), transaction_date DATE); INSERT INTO membership (member_id, demographic_segment) VALUES (1, 'Young Adults'), (2, 'Seniors'); INSERT INTO revenue (member_id, revenue, transaction_date) VALUES (1, 500, '2020-01-01'), (1, 600, '2020-02-01'), (2, 300, '2020-01-01');", "sql": "SELECT SUM(revenue) FROM revenue INNER JOIN membership ON revenue.member_id = membership.member_id WHERE membership.demographic_segment = 'Young Adults' AND YEAR(transaction_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "What is the total number of ad impressions and clicks for each advertiser, and what is the difference between them?", "schema": "CREATE TABLE advertisers (id INT, name VARCHAR(50)); CREATE TABLE ad_impressions (advertiser_id INT, impression_time TIMESTAMP); CREATE TABLE ad_clicks (advertiser_id INT, click_time TIMESTAMP);", "sql": "SELECT advertisers.name, COUNT(ad_impressions.advertiser_id) as total_impressions, COUNT(ad_clicks.advertiser_id) as total_clicks, COUNT(ad_impressions.advertiser_id) - COUNT(ad_clicks.advertiser_id) as difference FROM advertisers LEFT JOIN ad_impressions ON advertisers.id = ad_impressions.advertiser_id LEFT JOIN ad_clicks ON advertisers.id = ad_clicks.advertiser_id GROUP BY advertisers.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 395, "num_statements": 1} {"question": "Identify habitats with more than 30 animals and the number of species in each, excluding those habitats that are not protected", "schema": "CREATE TABLE habitats (id INT, name VARCHAR(255), is_protected BOOLEAN);CREATE TABLE animals (id INT, species_id INT, habitat_id INT);CREATE TABLE species (id INT, name VARCHAR(255)); INSERT INTO habitats (id, name, is_protected) VALUES (1, 'Forest', TRUE), (2, 'Savannah', FALSE), (3, 'Mountain', TRUE); INSERT INTO animals (id, species_id, habitat_id) VALUES (1, 1, 2), (2, 2, 1), (3, 3, 2), (4, 1, 2), (5, 4, 1), (6, 5, 3); INSERT INTO species (id, name) VALUES (1, 'Lion'), (2, 'Elephant'), (3, 'Giraffe'), (4, 'Zebra'), (5, 'Snow Leopard');", "sql": "SELECT h.name AS habitat_name, COUNT(DISTINCT a.species_id) AS species_count FROM animals a INNER JOIN habitats h ON a.habitat_id = h.id WHERE h.is_protected = TRUE GROUP BY h.name HAVING COUNT(a.id) > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 28).", "schema": null, "sql": "INSERT INTO replication_example(somedata, text) VALUES (3, 1);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 68).", "schema": null, "sql": "CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2));", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Determine the year with the highest Europium production.", "schema": "CREATE TABLE Europium_Production (Year INT, Quarter INT, Quantity INT); INSERT INTO Europium_Production (Year, Quarter, Quantity) VALUES (2018, 1, 150), (2018, 2, 175), (2018, 3, 200), (2018, 4, 225), (2019, 1, 250), (2019, 2, 275), (2019, 3, 300), (2019, 4, 325);", "sql": "SELECT Year, MAX(Quantity) FROM Europium_Production GROUP BY Year ORDER BY MAX(Quantity) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the race in Kenya with a time of 30:27?", "schema": "CREATE TABLE table_name_96 (race VARCHAR, nation VARCHAR, time VARCHAR)", "sql": "SELECT race FROM table_name_96 WHERE nation = 'kenya' AND time = '30:27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 10).", "schema": null, "sql": "SELECT count(*) FROM macaddr8tmp WHERE a < '22:00:5c:e5:9b:0d'::macaddr8;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show the total number of unique IP addresses that have been associated with malicious activity in the last month, including any IP addresses that have been involved in more than one incident.", "schema": "CREATE TABLE malicious_activity (id INT, ip VARCHAR(255), activity_date DATE); INSERT INTO malicious_activity (id, ip, activity_date) VALUES (1, '192.168.0.1', '2022-01-01'); INSERT INTO malicious_activity (id, ip, activity_date) VALUES (2, '192.168.0.2', '2022-01-05'); INSERT INTO malicious_activity (id, ip, activity_date) VALUES (3, '192.168.0.1', '2022-01-09');", "sql": "SELECT COUNT(DISTINCT ip) as total_unique_ips FROM malicious_activity WHERE activity_date >= DATEADD(month, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the average temperature change in the Arctic per year since 1990?", "schema": "CREATE TABLE weather_data (year INT, location VARCHAR(50), temperature FLOAT);", "sql": "SELECT AVG(temperature_change) FROM (SELECT (temperature - LAG(temperature) OVER (ORDER BY year)) AS temperature_change FROM weather_data WHERE year >= 1990) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 170, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 281).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (6,4,'.000000000000000000000000008807232244507937251856465017967626593430084223212999583902527587737263981869382895220711835510154989851222501080395520249593128253795609198666884523792646863341248402687314509176781281863891589925961900674092953408613128961234166906173266411035009516545964362406728942021813644419154548354247112601793685146960840364604115937119024575638240439041250900118977183124605578660115160551830946251713350556181960983267689939549506518185340972020820080460565392359379680036788592213479105831301723237102710863182596413567756605711230290883888612188805367801369264231165178487334557824054205160222371548005742602736713668548450400926514169967213301919971189065307721110805424950794015852531342286935114651278691214233054575660712537044810163930633456573860895791198853393107188289695511873068');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 851, "num_statements": 1} {"question": "What are the regulatory frameworks in place for smart contracts in the APAC region?", "schema": "CREATE TABLE RegulatoryFramework (FrameworkID INT, FrameworkName VARCHAR(100), FrameworkDescription VARCHAR(255), FrameworkRegion VARCHAR(50)); INSERT INTO RegulatoryFramework (FrameworkID, FrameworkName, FrameworkDescription, FrameworkRegion) VALUES (1, 'FrameworkA', 'DescriptionA', 'APAC'), (2, 'FrameworkB', 'DescriptionB', 'APAC'), (3, 'FrameworkC', 'DescriptionC', 'Europe');", "sql": "SELECT FrameworkName, FrameworkDescription FROM RegulatoryFramework WHERE FrameworkRegion = 'APAC';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "PostgreSQL regression test 'boolean': Write the SELECT query (example 26).", "schema": null, "sql": "SELECT bool '' AS error;", "explanation": "Regression test for Boolean in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT bool '' AS error) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 30).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('David');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Which mobile subscribers have not made a call in the last 30 days in the 'Europe' region?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, region VARCHAR(50), last_call_date DATETIME); INSERT INTO mobile_subscribers (subscriber_id, region, last_call_date) VALUES (1, 'Europe', '2022-02-15'), (2, 'Europe', '2022-03-03'), (3, 'Europe', NULL), (4, 'Europe', '2022-02-20'), (5, 'Europe', '2022-03-05');", "sql": "SELECT subscriber_id FROM mobile_subscribers WHERE region = 'Europe' AND last_call_date IS NULL OR last_call_date < DATE_SUB(CURDATE(), INTERVAL 30 DAY);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the average donation amount per month?", "schema": "CREATE TABLE Donations (DonationID INT, DonationDate DATE, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, DonationDate, DonationAmount) VALUES (1, '2022-01-01', 100.00), (2, '2022-01-15', 200.00), (3, '2022-02-01', 300.00), (4, '2022-02-15', 400.00);", "sql": "SELECT AVG(DonationAmount) OVER (PARTITION BY EXTRACT(MONTH FROM DonationDate) ORDER BY EXTRACT(MONTH FROM DonationDate)) AS AvgDonationPerMonth FROM Donations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which event had a total of 28?", "schema": "CREATE TABLE table_name_51 (event VARCHAR, total VARCHAR)", "sql": "SELECT event FROM table_name_51 WHERE total = '28';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Find the number of days in 'October 2021' with zero water usage for 'industrial' purposes in the 'water_usage' table", "schema": "CREATE TABLE water_usage (id INT, usage FLOAT, purpose VARCHAR(20), date DATE); INSERT INTO water_usage (id, usage, purpose, date) VALUES (1, 0, 'industrial', '2021-10-01'); INSERT INTO water_usage (id, usage, purpose, date) VALUES (2, 150, 'residential', '2021-10-01');", "sql": "SELECT COUNT(*) FROM (SELECT date FROM water_usage WHERE purpose = 'industrial' AND usage = 0 AND date BETWEEN '2021-10-01' AND '2021-10-31' GROUP BY date HAVING COUNT(*) = 1) as zero_days;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'merge' (example 365).", "schema": null, "sql": "CREATE TABLE part1 PARTITION OF pa_target FOR VALUES IN (1,4)\n WITH (autovacuum_enabled=off);", "explanation": "DDL from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average trip duration for wildlife enthusiasts from Brazil?", "schema": "CREATE TABLE wildlife_tourists (id INT, name VARCHAR, country VARCHAR, trip_duration FLOAT); INSERT INTO wildlife_tourists (id, name, country, trip_duration) VALUES (1, 'Ana Silva', 'Brazil', 10.3);", "sql": "SELECT AVG(trip_duration) FROM wildlife_tourists WHERE country = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times was incumbent Noble Jones Gregory first elected?", "schema": "CREATE TABLE table_1342218_17 (first_elected VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(first_elected) FROM table_1342218_17 WHERE incumbent = 'Noble Jones Gregory';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many cards does customer Art Turcotte have?", "schema": "CREATE TABLE Customers_cards (customer_id VARCHAR); CREATE TABLE Customers (customer_id VARCHAR, customer_first_name VARCHAR, customer_last_name VARCHAR)", "sql": "SELECT COUNT(*) FROM Customers_cards AS T1 JOIN Customers AS T2 ON T1.customer_id = T2.customer_id WHERE T2.customer_first_name = 'Art' AND T2.customer_last_name = 'Turcotte';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Find the total number of transactions and trading volume for each developer in the 'developers' and 'decentralized_exchanges' tables.", "schema": "CREATE TABLE developers (developer_id INT, developer_name VARCHAR(255)); CREATE TABLE decentralized_exchanges (exchange_name VARCHAR(255), developer_id INT, transaction_count INT, trading_volume DECIMAL(10, 2));", "sql": "SELECT d.developer_name, SUM(de.transaction_count) as total_transactions, SUM(de.trading_volume) as total_volume FROM developers d INNER JOIN decentralized_exchanges de ON d.developer_id = de.developer_id GROUP BY d.developer_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 1} {"question": "Update the name of the eco-friendly hotel in Spain to 'Green Hotel Barcelona'.", "schema": "CREATE TABLE hotels (id INT, name TEXT, country TEXT, type TEXT); INSERT INTO hotels (id, name, country, type) VALUES (1, 'Eco Hotel Madrid', 'Spain', 'eco');", "sql": "UPDATE hotels SET name = 'Green Hotel Barcelona' WHERE id = 1 AND country = 'Spain' AND type = 'eco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 8).", "schema": null, "sql": "/* Now redefine */\nCREATE FUNCTION pg_stat_statements(IN showtext boolean,\n OUT userid oid,\n OUT dbid oid,\n OUT toplevel bool,\n OUT queryid bigint,\n OUT query text,\n OUT plans int8,\n OUT total_plan_time float8,\n OUT min_plan_time float8,\n OUT max_plan_time float8,\n OUT mean_plan_time float8,\n OUT stddev_plan_time float8,\n OUT calls int8,\n OUT total_exec_time float8,\n OUT min_exec_time float8,\n OUT max_exec_time float8,\n OUT mean_exec_time float8,\n OUT stddev_exec_time float8,\n OUT rows int8,\n OUT shared_blks_hit int8,\n OUT shared_blks_read int8,\n OUT shared_blks_dirtied int8,\n OUT shared_blks_written int8,\n OUT local_blks_hit int8,\n OUT local_blks_read int8,\n OUT local_blks_dirtied int8,\n OUT local_blks_written int8,\n OUT temp_blks_read int8,\n OUT temp_blks_written int8,\n OUT blk_read_time float8,\n OUT blk_write_time float8,\n OUT wal_records int8,\n OUT wal_fpi int8,\n OUT wal_bytes numeric\n)\nRETURNS SETOF record\nAS 'MODULE_PATHNAME', 'pg_stat_statements_1_9'\nLANGUAGE C STRICT VOLATILE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 1112, "num_statements": 1} {"question": "What is the total number of animals in 'animal_population' table?", "schema": "CREATE TABLE animal_population (id INT, species VARCHAR(50), population INT);INSERT INTO animal_population (id, species, population) VALUES (1, 'Tiger', 250), (2, 'Elephant', 500);", "sql": "SELECT SUM(population) FROM animal_population;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the description of the color for most products?", "schema": "CREATE TABLE ref_colors (color_description VARCHAR, color_code VARCHAR); CREATE TABLE products (color_code VARCHAR)", "sql": "SELECT t2.color_description FROM products AS t1 JOIN ref_colors AS t2 ON t1.color_code = t2.color_code GROUP BY t2.color_description ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many losses have corinthians as the team, with an against greater than 26?", "schema": "CREATE TABLE table_name_32 (lost INTEGER, team VARCHAR, against VARCHAR)", "sql": "SELECT SUM(lost) FROM table_name_32 WHERE team = 'corinthians' AND against > 26;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which defense contracts have a total value greater than $50,000,000?", "schema": "CREATE TABLE defense_contracts (contract_id INT, contract_value FLOAT, contract_description TEXT, vendor_name TEXT, agency_name TEXT); INSERT INTO defense_contracts (contract_id, contract_value, contract_description, vendor_name, agency_name) VALUES (1, 75000000, 'Aircraft maintenance services', 'ABC Aerospace', 'US Air Force'); INSERT INTO defense_contracts (contract_id, contract_value, contract_description, vendor_name, agency_name) VALUES (2, 30000000, 'Cybersecurity services', 'DEF Security Solutions', 'US Army');", "sql": "SELECT contract_id, contract_value, contract_description, vendor_name, agency_name FROM defense_contracts WHERE contract_value > 50000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Orbital period of 89 to 128 min has what specific orbital energy?", "schema": "CREATE TABLE table_name_73 (specific_orbital_energy VARCHAR, orbital_period VARCHAR)", "sql": "SELECT specific_orbital_energy FROM table_name_73 WHERE orbital_period = '89 to 128 min';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the total revenue for vegetarian menu items?", "schema": "CREATE TABLE restaurants (id INT, name VARCHAR(255)); INSERT INTO restaurants (id, name) VALUES (1, 'Restaurant A'), (2, 'Restaurant B'), (3, 'Restaurant C'); CREATE TABLE menu_items (id INT, name VARCHAR(255), vegetarian BOOLEAN, restaurant_id INT); INSERT INTO menu_items (id, name, vegetarian, restaurant_id) VALUES (1, 'Tacos', FALSE, 1), (2, 'Pizza', TRUE, 2), (3, 'Fried Rice', FALSE, 3), (4, 'Burrito', TRUE, 1), (5, 'Spaghetti', FALSE, 2); CREATE TABLE orders (menu_item_id INT, revenue INT); INSERT INTO orders (menu_item_id, revenue) VALUES (1, 500), (2, 700), (3, 600), (4, 800), (5, 900);", "sql": "SELECT SUM(o.revenue) as total_revenue FROM orders o JOIN menu_items mi ON o.menu_item_id = mi.id WHERE mi.vegetarian = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Determine the total number of volunteers for each gender, from the 'Volunteer_Info' table, grouped by Gender.", "schema": "CREATE TABLE Volunteer_Info (VolunteerID INT, First_Name VARCHAR(50), Last_Name VARCHAR(50), Gender VARCHAR(10));", "sql": "SELECT Gender, COUNT(*) AS Number_Of_Volunteers FROM Volunteer_Info GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Delete all posts with comments made by a user from Brazil before 2022-07-01.", "schema": "CREATE TABLE posts (id INT, user_id INT, country VARCHAR(20), title TEXT, created_date DATE); INSERT INTO posts (id, user_id, country, title, created_date) VALUES (1, 12, 'Brazil', 'AI in Society', '2022-06-01'); CREATE TABLE comments (id INT, post_id INT, user_id INT, text TEXT, created_date DATE); INSERT INTO comments (id, post_id, user_id, text, created_date) VALUES (1, 1, 13, 'Interesting thoughts.', '2022-06-15'), (2, 1, 13, 'Good read.', '2022-07-10');", "sql": "DELETE FROM posts WHERE id IN (SELECT post_id FROM comments c JOIN (SELECT post_id, MIN(created_date) as mindate FROM comments WHERE country = 'Brazil' GROUP BY post_id) d ON c.post_id = d.post_id WHERE c.country = 'Brazil' AND c.created_date < d.mindate);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What city is Gwang-Yang stadium in?", "schema": "CREATE TABLE table_name_2 (city VARCHAR, stadium VARCHAR)", "sql": "SELECT city FROM table_name_2 WHERE stadium = 'gwang-yang stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Who is the top donor in terms of total donation amount?", "schema": "CREATE TABLE Donors (DonorID INT, Name TEXT, DonationAmount DECIMAL);", "sql": "SELECT Name, SUM(DonationAmount) AS TotalDonation FROM Donors GROUP BY Name ORDER BY TotalDonation DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the original air date of 1/2 in season", "schema": "CREATE TABLE table_16951593_1 (original_air_date VARCHAR, no_in_season VARCHAR)", "sql": "SELECT original_air_date FROM table_16951593_1 WHERE no_in_season = '1/2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the total sales by ethical material and gender?", "schema": "CREATE TABLE SalesByMaterialAndGender (SaleID INT, Material VARCHAR(50), Gender VARCHAR(50), Sales DECIMAL(5,2)); INSERT INTO SalesByMaterialAndGender (SaleID, Material, Gender, Sales) VALUES (1, 'Organic Cotton', 'Male', 500.50), (2, 'Hemp', 'Female', 250.20), (3, 'Recycled Polyester', 'Non-binary', 156.60), (4, 'Tencel', 'Male', 100.10);", "sql": "SELECT Material, Gender, SUM(Sales) AS TotalSales FROM SalesByMaterialAndGender GROUP BY Material, Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total cost of wind projects?", "schema": "CREATE TABLE projects (project_id INT, project_name VARCHAR(50), project_type VARCHAR(50), project_cost FLOAT, project_start_date DATE); INSERT INTO projects (project_id, project_name, project_type, project_cost, project_start_date) VALUES (1, 'Solar Farm A', 'Solar', 5000000, '2020-01-01'), (2, 'Wind Farm B', 'Wind', 7000000, '2019-01-01');", "sql": "SELECT SUM(project_cost) as total_wind_cost FROM projects WHERE project_type = 'Wind';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 18).", "schema": null, "sql": "SELECT * FROM INT8_TBL WHERE q2 >= 4567890123456789;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INT8_TBL WHERE q2 >= 4567890123456789) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Insert new records into the 'inventory' table with the following data: (1001, 'Pre-rolls', 100, 20), (1002, 'Gelato', 50, 15), (1003, 'Sour Diesel', 75, 12)", "schema": "CREATE TABLE inventory (inventory_id INT, product_name VARCHAR(255), quantity INT, price DECIMAL(5,2));", "sql": "INSERT INTO inventory (inventory_id, product_name, quantity, price) VALUES (1001, 'Pre-rolls', 100, 20), (1002, 'Gelato', 50, 15), (1003, 'Sour Diesel', 75, 12);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "List all defense contracts with vendor companies located in Texas, sorted by contract value in descending order.", "schema": "CREATE TABLE defense_contracts (contract_id INT, contract_value FLOAT, vendor_state VARCHAR(2)); INSERT INTO defense_contracts (contract_id, contract_value, vendor_state) VALUES (1, 1000000, 'TX'), (2, 500000, 'CA'), (3, 750000, 'TX');", "sql": "SELECT * FROM defense_contracts WHERE vendor_state = 'TX' ORDER BY contract_value DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 164).", "schema": null, "sql": "insert into testhstore0 values (1, 'foo', 1.2, 3::float8);", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the amount lost when there is 1 draw and 515 points?", "schema": "CREATE TABLE table_name_72 (lost VARCHAR, drawn VARCHAR, points_for VARCHAR)", "sql": "SELECT lost FROM table_name_72 WHERE drawn = '1' AND points_for = '515';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the names and case numbers of cases in 'cases' table that were assigned to attorney_id 5", "schema": "CREATE TABLE cases (case_id INT, case_number VARCHAR(50), client_name VARCHAR(50), attorney_id INT);", "sql": "SELECT cases.case_number, cases.client_name FROM cases WHERE cases.attorney_id = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 128).", "schema": null, "sql": "SELECT segment_number, file_offset\nFROM pg_walfile_name_offset('0/0'::pg_lsn + :segment_size + 1),\n pg_split_walfile_name(file_name);", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT segment_number, file_offset\nFROM pg_walfile_name_offset('0/0'::pg_lsn + :segment_size + 1),\n pg_split_walfile_name(file_name)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the north melbourne team play?", "schema": "CREATE TABLE table_name_7 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_7 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 181).", "schema": null, "sql": "SELECT * FROM check_test(\n has_operator( 'integer', 'pg_catalog', '<=', 'text', 'bool', 'desc' ),\n false,\n 'has_operator( left, schema, name, right, result, desc ) fail',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 191, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Volume:Issue, when Weeks on Top is less than 5, and when Issue Date(s) are 10 July - 24 July?", "schema": "CREATE TABLE table_name_75 (volume VARCHAR, weeks_on_top VARCHAR, issue_date_s_ VARCHAR)", "sql": "SELECT volume AS :issue FROM table_name_75 WHERE weeks_on_top < 5 AND issue_date_s_ = '10 july - 24 july';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest Game where Inning is 6th, and the Opposing Pitcher is cliff curtis?", "schema": "CREATE TABLE table_name_14 (game INTEGER, inning VARCHAR, opposing_pitcher VARCHAR)", "sql": "SELECT MIN(game) FROM table_name_14 WHERE inning = '6th' AND opposing_pitcher = 'cliff curtis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the percentage change in yield for each crop over the past year?", "schema": "CREATE TABLE farming (id INT, name TEXT, location TEXT, crop TEXT, yield INT, year INT); INSERT INTO farming VALUES (1, 'Smith Farm', 'Colorado', 'Corn', 120, 2020), (2, 'Brown Farm', 'Nebraska', 'Soybeans', 45, 2020), (3, 'Jones Farm', 'Iowa', 'Wheat', 80, 2020), (1, 'Smith Farm', 'Colorado', 'Corn', 130, 2021), (2, 'Brown Farm', 'Nebraska', 'Soybeans', 50, 2021), (3, 'Jones Farm', 'Iowa', 'Wheat', 85, 2021);", "sql": "SELECT crop, ROUND(100.0 * (yield - LAG(yield) OVER (PARTITION BY crop ORDER BY year)) / LAG(yield) OVER (PARTITION BY crop ORDER BY year), 2) as percentage_change FROM farming WHERE year > 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season has a number less than 90, Mitte as the league and spvgg ruhmannsfelden as the team?", "schema": "CREATE TABLE table_name_95 (season VARCHAR, team VARCHAR, number VARCHAR, league VARCHAR)", "sql": "SELECT season FROM table_name_95 WHERE number < 90 AND league = 'mitte' AND team = 'spvgg ruhmannsfelden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are average launches with 0 failures, rocket of Soyuz, and less than 12 successes?", "schema": "CREATE TABLE table_name_3 (launches INTEGER, successes VARCHAR, failures VARCHAR, rocket VARCHAR)", "sql": "SELECT AVG(launches) FROM table_name_3 WHERE failures = 0 AND rocket = 'soyuz' AND successes < 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the number of infrastructure projects in the Western region of the US for each year since 2015, categorized by type?", "schema": "CREATE TABLE Infrastructure_Projects (Project_ID INT, Project_Name VARCHAR(255), Project_Type VARCHAR(255), Year INT, State VARCHAR(255));", "sql": "SELECT Year, Project_Type, COUNT(*) FROM Infrastructure_Projects WHERE State LIKE '%Western%' AND Year >= 2015 GROUP BY Year, Project_Type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the NGC number for leo of irregular galaxy", "schema": "CREATE TABLE table_name_16 (ngc_number VARCHAR, constellation VARCHAR, object_type VARCHAR)", "sql": "SELECT ngc_number FROM table_name_16 WHERE constellation = 'leo' AND object_type = 'irregular galaxy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the Suntree Seniors Classic?", "schema": "CREATE TABLE table_11622924_1 (winner VARCHAR, tournament VARCHAR)", "sql": "SELECT winner FROM table_11622924_1 WHERE tournament = 'Suntree Seniors Classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum installed capacity of wind energy projects in Spain?", "schema": "CREATE TABLE wind_projects (id INT, country VARCHAR(50), capacity FLOAT); INSERT INTO wind_projects (id, country, capacity) VALUES (1, 'Spain', 234.56), (2, 'Italy', 123.45), (3, 'Spain', 678.90);", "sql": "SELECT MAX(capacity) FROM wind_projects WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 13).", "schema": null, "sql": "SELECT count(*) FROM float8tmp WHERE a = -1890.0::float8;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the record was 32-37 what was the score?", "schema": "CREATE TABLE table_name_92 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_92 WHERE record = '32-37';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the name for deceased spouse being louis malle", "schema": "CREATE TABLE table_24143253_1 (name VARCHAR, deceased_spouse VARCHAR)", "sql": "SELECT name FROM table_24143253_1 WHERE deceased_spouse = 'Louis Malle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 64).", "schema": null, "sql": "select jsonb_path_query('{\"a\": 12, \"b\": {\"a\": 13}}', '$.a');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('{\"a\": 12, \"b\": {\"a\": 13}}', '$.a')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What place was the viewed horse?", "schema": "CREATE TABLE table_name_13 (placing VARCHAR, horse VARCHAR)", "sql": "SELECT placing FROM table_name_13 WHERE horse = 'viewed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "How many visual art exhibitions were held in Paris and London in 2020?", "schema": "CREATE TABLE exhibitions (id INT, city VARCHAR(20), year INT, type VARCHAR(10)); INSERT INTO exhibitions (id, city, year, type) VALUES (1, 'Paris', 2020, 'visual art'), (2, 'London', 2019, 'visual art'), (3, 'Paris', 2020, 'performing art'), (4, 'London', 2020, 'visual art'), (5, 'New York', 2020, 'visual art');", "sql": "SELECT COUNT(*) FROM exhibitions WHERE city IN ('Paris', 'London') AND year = 2020 AND type = 'visual art';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average episode number where jimmy mulville was the 4th performer?", "schema": "CREATE TABLE table_name_35 (episode INTEGER, performer_4 VARCHAR)", "sql": "SELECT AVG(episode) FROM table_name_35 WHERE performer_4 = 'jimmy mulville';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "How many unique users have streamed songs from the country genre?", "schema": "CREATE TABLE songs (id INT, title VARCHAR(255), genre VARCHAR(255), release_year INT); CREATE TABLE streams (stream_id INT, song_id INT, user_id INT, timestamp TIMESTAMP); INSERT INTO songs (id, title, genre, release_year) VALUES (1, 'Song1', 'Country', 2016), (2, 'Song2', 'Rock', 2010), (3, 'Song3', 'Country', 2015); INSERT INTO streams (stream_id, song_id, user_id, timestamp) VALUES (1, 1, 1, '2022-01-01 10:00:00'), (2, 2, 2, '2022-01-02 10:00:00'), (3, 1, 3, '2022-01-03 10:00:00'), (4, 3, 4, '2022-01-04 10:00:00'), (5, 1, 5, '2022-01-05 10:00:00');", "sql": "SELECT COUNT(DISTINCT user_id) FROM streams JOIN songs ON streams.song_id = songs.id WHERE songs.genre = 'Country';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 301).", "schema": null, "sql": "UPDATE dubs SET name = 'foo' WHERE pk = 1;", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Find the total production quantity (in metric tons) of Yttrium for the years 2014, 2015 and 2016.", "schema": "CREATE TABLE production_data (year INT, element TEXT, production_quantity FLOAT); INSERT INTO production_data (year, element, production_quantity) VALUES (2014, 'Yttrium', 700); INSERT INTO production_data (year, element, production_quantity) VALUES (2015, 'Yttrium', 750); INSERT INTO production_data (year, element, production_quantity) VALUES (2016, 'Yttrium', 800);", "sql": "SELECT SUM(production_quantity) FROM production_data WHERE element = 'Yttrium' AND year BETWEEN 2014 AND 2016;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which away team that had 48 as a Tie no?", "schema": "CREATE TABLE table_name_46 (away_team VARCHAR, tie_no VARCHAR)", "sql": "SELECT away_team FROM table_name_46 WHERE tie_no = '48';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Delegate has a Placement in Miss Universe of fourth runner-up, and a Hometown of makati , rizal?", "schema": "CREATE TABLE table_name_96 (delegate VARCHAR, placement_in_miss_universe VARCHAR, hometown VARCHAR)", "sql": "SELECT delegate FROM table_name_96 WHERE placement_in_miss_universe = 'fourth runner-up' AND hometown = 'makati , rizal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "How many marine species are found in the Atlantic basin, grouped by species name?", "schema": "CREATE TABLE marine_species_atlantic (name VARCHAR(255), basin VARCHAR(255)); INSERT INTO marine_species_atlantic (name, basin) VALUES ('Species1', 'Atlantic'), ('Species2', 'Pacific'), ('Species3', 'Indian'), ('Species4', 'Atlantic');", "sql": "SELECT name, COUNT(*) as num_species FROM marine_species_atlantic WHERE basin = 'Atlantic' GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "What is the total number of military personnel and their corresponding rank for each branch in the 'military_personnel' table, sorted by the total number of personnel in descending order?", "schema": "CREATE TABLE military_personnel (id INT, branch VARCHAR(255), rank VARCHAR(255), personnel INT);", "sql": "SELECT branch, SUM(personnel) as total_personnel FROM military_personnel GROUP BY branch ORDER BY total_personnel DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many touchdowns were made where field goals were less than 0?", "schema": "CREATE TABLE table_name_81 (touchdowns INTEGER, field_goals INTEGER)", "sql": "SELECT SUM(touchdowns) FROM table_name_81 WHERE field_goals < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Delete all records of suppliers from 'Country F' who do not supply parts to factories with Industry 4.0 practices.", "schema": "CREATE TABLE Suppliers (ID INT, Country VARCHAR(20), Industry_4_0 BOOLEAN); INSERT INTO Suppliers (ID, Country, Industry_4_0) VALUES (1, 'Country A', TRUE), (2, 'Country B', FALSE), (3, 'Country A', FALSE), (4, 'Country F', TRUE), (5, 'Country F', FALSE);", "sql": "DELETE FROM Suppliers WHERE Country = 'Country F' AND NOT EXISTS (SELECT 1 FROM Factories WHERE Suppliers.ID = Factories.Supplier_ID AND Factories.Industry_4_0 = TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Calculate the revenue of non-toxic haircare products, excluding those sold in the South region", "schema": "CREATE TABLE sales (product_type VARCHAR(20), region VARCHAR(10), revenue NUMERIC(10,2)); INSERT INTO sales (product_type, region, revenue) VALUES ('shampoo', 'East', 800), ('conditioner', 'East', 900), ('hair serum', 'West', 700), ('hair spray', 'North', 600), ('hair gel', 'South', 500), ('shampoo', 'South', 400), ('conditioner', 'North', 300); CREATE TABLE products (product_type VARCHAR(20), non_toxic BOOLEAN); INSERT INTO products (product_type, non_toxic) VALUES ('shampoo', TRUE), ('conditioner', TRUE), ('hair serum', TRUE), ('hair spray', FALSE), ('hair gel', FALSE);", "sql": "SELECT SUM(revenue) FROM sales INNER JOIN products ON sales.product_type = products.product_type WHERE products.non_toxic = TRUE AND sales.region != 'South' AND sales.product_type = 'haircare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the share when the audience was 3.944.000?", "schema": "CREATE TABLE table_name_82 (share VARCHAR, audience VARCHAR)", "sql": "SELECT share FROM table_name_82 WHERE audience = '3.944.000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is No. 3, when No. 7 is Abigail, and when No. 4 is Ava?", "schema": "CREATE TABLE table_name_79 (no_3 VARCHAR, no_7 VARCHAR, no_4 VARCHAR)", "sql": "SELECT no_3 FROM table_name_79 WHERE no_7 = 'abigail' AND no_4 = 'ava';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 939).", "schema": null, "sql": "select jsonb_to_tsvector('[]'::jsonb, '\"all\"');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('[]'::jsonb, '\"all\"')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the time on Wed aug 25 was 20' 09.25 112.324mph, what was the time on sat Aug 28?", "schema": "CREATE TABLE table_26986076_1 (sat_28_aug VARCHAR, wed_25_aug VARCHAR)", "sql": "SELECT sat_28_aug FROM table_26986076_1 WHERE wed_25_aug = '20' 09.25 112.324mph';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of generators where the power capicity is 78.7?", "schema": "CREATE TABLE table_11456251_5 (number_of_generators VARCHAR, power_capacity__gw_ VARCHAR)", "sql": "SELECT COUNT(number_of_generators) FROM table_11456251_5 WHERE power_capacity__gw_ = '78.7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'subselect' (example 195).", "schema": null, "sql": "create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);", "explanation": "DDL from PostgreSQL core regression test for Subselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result of the game when the competition was a friendly match, and the Score was 1 – 1?", "schema": "CREATE TABLE table_name_65 (result VARCHAR, competition VARCHAR, score VARCHAR)", "sql": "SELECT result FROM table_name_65 WHERE competition = 'friendly match' AND score = '1 – 1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game which featured the gold coast blaze as the away team and the adelaide 36ers as the home team?", "schema": "CREATE TABLE table_name_16 (score VARCHAR, away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT score FROM table_name_16 WHERE away_team = 'gold coast blaze' AND home_team = 'adelaide 36ers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Which product category had the lowest total revenue in Massachusetts in 2021?", "schema": "CREATE TABLE mass_sales (product VARCHAR(20), revenue DECIMAL(10,2), state VARCHAR(20), year INT); INSERT INTO mass_sales (product, revenue, state, year) VALUES ('Flower', 70000, 'Massachusetts', 2021), ('Concentrate', 60000, 'Massachusetts', 2021), ('Edibles', 50000, 'Massachusetts', 2021);", "sql": "SELECT product, MIN(revenue) as min_revenue FROM mass_sales WHERE state = 'Massachusetts' AND year = 2021 GROUP BY product;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the average safety rating of electric vehicles, compared to non-electric vehicles, per manufacturer?", "schema": "CREATE TABLE VehicleSafetyRatings (id INT, make VARCHAR(20), model VARCHAR(20), is_electric BOOLEAN, safety_rating FLOAT); INSERT INTO VehicleSafetyRatings (id, make, model, is_electric, safety_rating) VALUES (1, 'Tesla', 'Model S', true, 5.2), (2, 'Tesla', 'Model 3', true, 5.4), (3, 'Volvo', 'XC60', false, 5.1), (4, 'Volvo', 'S60', false, 4.9), (5, 'BMW', 'X5', false, 4.8);", "sql": "SELECT make, AVG(safety_rating) FILTER (WHERE is_electric = true) AS avg_electric_safety_rating, AVG(safety_rating) FILTER (WHERE is_electric = false) AS avg_non_electric_safety_rating FROM VehicleSafetyRatings GROUP BY make;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 226, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the mosst finals apps", "schema": "CREATE TABLE table_21220720_1 (finals_apps INTEGER)", "sql": "SELECT MAX(finals_apps) FROM table_21220720_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'generated_stored' (example 17).", "schema": null, "sql": "CREATE TABLE gtest_err_5b (a int PRIMARY KEY, b int GENERATED ALWAYS AS identity GENERATED ALWAYS AS (a * 2) STORED);", "explanation": "DDL from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 117, "num_statements": 1} {"question": "How many agricultural extension officers were trained in Kenya in 2019?", "schema": "CREATE TABLE officer_training (id INT, officer_id INT, country VARCHAR(50), training_year INT, trained BOOLEAN); INSERT INTO officer_training (id, officer_id, country, training_year, trained) VALUES (1, 4001, 'Kenya', 2019, true), (2, 4002, 'Kenya', 2018, true);", "sql": "SELECT COUNT(*) FROM officer_training WHERE country = 'Kenya' AND training_year = 2019 AND trained = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What are the names of the cybersecurity strategies implemented before 2015?", "schema": "CREATE TABLE Cybersecurity_Strategies (Year INT, Strategy VARCHAR(255)); INSERT INTO Cybersecurity_Strategies (Year, Strategy) VALUES (2005, 'Cybersecurity Initiative'), (2010, 'Comprehensive National Cybersecurity Initiative'), (2015, 'Cybersecurity National Action Plan');", "sql": "SELECT Strategy FROM Cybersecurity_Strategies WHERE Year < 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the second leg that has kk bosna?", "schema": "CREATE TABLE table_name_49 (team__number1 VARCHAR)", "sql": "SELECT 2 AS nd_leg FROM table_name_49 WHERE team__number1 = 'kk bosna';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Which union activities in Washington have the highest average duration?", "schema": "CREATE TABLE UnionActivities (id INT, union_member_id INT, activity_type VARCHAR, duration INT); CREATE TABLE UnionMembers (id INT, name VARCHAR, state VARCHAR, union_member BOOLEAN);", "sql": "SELECT ua.activity_type, AVG(ua.duration) as avg_duration FROM UnionActivities ua JOIN UnionMembers um ON ua.union_member_id = um.id WHERE um.state = 'Washington' GROUP BY ua.activity_type ORDER BY avg_duration DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 224, "num_statements": 1} {"question": "What is the number of cultural competency trainings conducted in California and New York?", "schema": "CREATE TABLE cultural_competency_trainings (training_id INT, location VARCHAR(50), date DATE); INSERT INTO cultural_competency_trainings (training_id, location, date) VALUES (1, 'Los Angeles, CA', '2022-01-01'), (2, 'San Diego, CA', '2022-02-01'), (3, 'San Francisco, CA', '2022-03-01'), (4, 'New York, NY', '2022-02-15');", "sql": "SELECT COUNT(*) FROM cultural_competency_trainings WHERE location IN ('CA', 'NY');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Find the bioprocess engineering projects that have a budget between $4M and $6M.", "schema": "CREATE TABLE projects (name TEXT, budget FLOAT); INSERT INTO projects (name, budget) VALUES ('ProjectA', 4500000), ('ProjectB', 5500000), ('ProjectC', 3500000);", "sql": "SELECT name FROM projects WHERE budget BETWEEN 4000000 AND 6000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What NHL team has a player in the position of Left Wing that came from the Toronto Marlboros (omjhl)?", "schema": "CREATE TABLE table_name_81 (nhl_team VARCHAR, position VARCHAR, college_junior_club_team VARCHAR)", "sql": "SELECT nhl_team FROM table_name_81 WHERE position = 'left wing' AND college_junior_club_team = 'toronto marlboros (omjhl)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the maximum number of years a traditional craft has been preserved in each province of China?", "schema": "CREATE TABLE provinces (id INT, name TEXT); INSERT INTO provinces (id, name) VALUES (1, 'Yunnan'), (2, 'Guangdong'), (3, 'Sichuan'); CREATE TABLE crafts (id INT, province_id INT, name TEXT, year_preserved INT); INSERT INTO crafts (id, province_id, name, year_preserved) VALUES (1, 1, 'Batik', 800), (2, 1, 'Embroidery', 1000), (3, 2, 'Pottery', 700), (4, 2, 'Woodcarving', 500), (5, 3, 'Papermaking', 1200), (6, 3, 'Bamboo weaving', 900);", "sql": "SELECT p.name, MAX(c.year_preserved) FROM provinces p JOIN crafts c ON p.id = c.province_id GROUP BY p.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 365).", "schema": null, "sql": "CREATE TABLE b1 (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the maximum fairness score for each AI algorithm in the 'creative_ai' database?", "schema": "CREATE TABLE creative_ai.ai_algorithms (ai_algorithm_id INT PRIMARY KEY, ai_algorithm VARCHAR(255), fairness_score FLOAT); INSERT INTO creative_ai.ai_algorithms (ai_algorithm_id, ai_algorithm, fairness_score) VALUES (1, 'Generative Adversarial Networks', 0.75), (2, 'Transformers', 0.85), (3, 'Deep Reinforcement Learning', 0.65);", "sql": "SELECT ai_algorithm, MAX(fairness_score) as max_fairness_score FROM creative_ai.ai_algorithms GROUP BY ai_algorithm;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which term had a Democratic representative from district 7?", "schema": "CREATE TABLE table_name_22 (term VARCHAR, district VARCHAR, party VARCHAR)", "sql": "SELECT term FROM table_name_22 WHERE district = '7' AND party = 'democratic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the number of concerts for artists who identify as female and are from Africa in 2025?", "schema": "CREATE TABLE concert_events (event_id INT, artist_id INT, event_date DATE, event_location VARCHAR(255), attendance INT, revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO concert_events (event_id, artist_id, event_date, event_location, attendance, revenue, country) VALUES (1, 1, '2025-01-01', 'NYC', 15000, 500000.00, 'South Africa'); CREATE TABLE artist_demographics (artist_id INT, artist_name VARCHAR(255), gender VARCHAR(50), ethnicity VARCHAR(50), country VARCHAR(50)); INSERT INTO artist_demographics (artist_id, artist_name, gender, ethnicity, country) VALUES (1, 'Amina Mohamed', 'female', 'African', 'South Africa');", "sql": "SELECT COUNT(*) FROM concert_events ce JOIN artist_demographics ad ON ce.artist_id = ad.artist_id WHERE ad.gender = 'female' AND ad.ethnicity = 'African' AND ce.event_date BETWEEN '2025-01-01' AND '2025-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of rounds of the player from lehigh college with an overall less than 152?", "schema": "CREATE TABLE table_name_39 (round VARCHAR, college VARCHAR, overall VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_39 WHERE college = 'lehigh' AND overall < 152;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What's the total value of all digital assets on the Solana network?", "schema": "CREATE TABLE solana_assets (id INT PRIMARY KEY, name VARCHAR(255), network VARCHAR(255), value DECIMAL(10, 2)); INSERT INTO solana_assets (id, name, network, value) VALUES (1, 'Asset1', 'Solana', 100), (2, 'Asset2', 'Solana', 150);", "sql": "SELECT SUM(value) FROM solana_assets WHERE network = 'Solana';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "List all customer complaints related to mobile services in the Asia region", "schema": "CREATE TABLE asia_complaints (complaint_id INT, subscriber_type VARCHAR(10), country VARCHAR(10), complaint VARCHAR(50));", "sql": "SELECT complaint FROM asia_complaints WHERE subscriber_type = 'mobile' AND country IN (SELECT country FROM country WHERE region = 'Asia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What are the account types for the accounts involved in Fraud?", "schema": "CREATE TABLE Accounts (AccountId INT, AccountType VARCHAR(20)); INSERT INTO Accounts (AccountId, AccountType) VALUES (1, 'Checking'), (2, 'Savings'); CREATE TABLE Fraud (FraudId INT, AccountId INT); INSERT INTO Fraud (FraudId, AccountId) VALUES (1, 1), (2, 3);", "sql": "SELECT A.AccountType FROM Accounts A JOIN Fraud F ON A.AccountId = F.AccountId;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 5.5 is the l-band what is the v-band?", "schema": "CREATE TABLE table_186468_1 (v_band VARCHAR, k_band VARCHAR)", "sql": "SELECT v_band FROM table_186468_1 WHERE k_band = '5.5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total number of AI safety incidents reported in the European Union in Q2 2022?", "schema": "CREATE TABLE safety_incidents (incident_id INT, incident_date DATE, country TEXT); INSERT INTO safety_incidents (incident_id, incident_date, country) VALUES (1, '2022-04-01', 'Germany'), (2, '2022-05-15', 'France'), (3, '2022-06-27', 'Italy');", "sql": "SELECT COUNT(*) as num_incidents FROM safety_incidents WHERE country IN ('Germany', 'France', 'Italy') AND incident_date BETWEEN '2022-04-01' AND '2022-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the names of phones that have total number of stocks bigger than 2000, in descending order of the total number of stocks.", "schema": "CREATE TABLE phone_market (Phone_ID VARCHAR, Num_of_stock INTEGER); CREATE TABLE phone (Name VARCHAR, Phone_ID VARCHAR)", "sql": "SELECT T2.Name FROM phone_market AS T1 JOIN phone AS T2 ON T1.Phone_ID = T2.Phone_ID GROUP BY T2.Name HAVING SUM(T1.Num_of_stock) >= 2000 ORDER BY SUM(T1.Num_of_stock) DESC;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Create a table named 'veteran_employment' to store veteran employment statistics", "schema": "CREATE TABLE veteran_employment (id INT PRIMARY KEY, state VARCHAR(2), year INT, total_veterans INT, employed_veterans INT, unemployed_veterans INT);", "sql": "CREATE TABLE veteran_employment (id INT PRIMARY KEY, state VARCHAR(2), year INT, total_veterans INT, employed_veterans INT, unemployed_veterans INT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'lock' (example 7).", "schema": null, "sql": "CREATE VIEW lock_view3 AS SELECT * from lock_view2;", "explanation": "DDL from PostgreSQL core regression test for Lock.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the average total funding for startups founded by underrepresented minorities in the Tech sector?", "schema": "CREATE TABLE startups(id INT, name TEXT, industry TEXT, total_funding FLOAT, founder TEXT); INSERT INTO startups VALUES(1, 'StartupA', 'Tech', 15000000, 'Asian'); INSERT INTO startups VALUES(2, 'StartupB', 'Tech', 20000000, 'White'); INSERT INTO startups VALUES(3, 'StartupC', 'Healthcare', 12000000, 'Hispanic'); INSERT INTO startups VALUES(4, 'StartupD', 'Finance', 30000000, 'Black'); INSERT INTO startups VALUES(5, 'StartupE', 'Tech', 8000000, 'Underrepresented Minority');", "sql": "SELECT AVG(total_funding) FROM startups WHERE industry = 'Tech' AND founder IN ('Underrepresented Minority', 'African American', 'Hispanic', 'Native American');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the average GPA of graduate students in each department?", "schema": "CREATE TABLE graduate_students (student_id INT, name TEXT, gpa DECIMAL(3,2), department TEXT);", "sql": "SELECT gs.department, AVG(gs.gpa) FROM graduate_students gs GROUP BY gs.department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Add a new column 'country' to the 'member_demographics' table", "schema": "CREATE TABLE member_demographics (member_id INT, age INT, gender VARCHAR(10), city VARCHAR(50), state VARCHAR(20));", "sql": "ALTER TABLE member_demographics ADD country VARCHAR(50);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of the game where the Cavaliers have a Record of 15-27?", "schema": "CREATE TABLE table_name_57 (date VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_57 WHERE record = '15-27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: when is the opponent lyoto machida?", "schema": "CREATE TABLE table_name_94 (date VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_94 WHERE opponent = 'lyoto machida';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many players have played the game 'Puzzle Pioneers' and achieved a score between 500 and 1500?", "schema": "CREATE TABLE Puzzle_Pioneers (player_id INT, player_name VARCHAR(50), score INT); INSERT INTO Puzzle_Pioneers (player_id, player_name, score) VALUES (1, 'Alex Brown', 700), (2, 'Bella Johnson', 1600), (3, 'Charlie Lee', 400);", "sql": "SELECT COUNT(DISTINCT player_id) FROM Puzzle_Pioneers WHERE score BETWEEN 500 AND 1500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Total, when Nation is West Germany, and when Gold is greater than 2?", "schema": "CREATE TABLE table_name_45 (total INTEGER, nation VARCHAR, gold VARCHAR)", "sql": "SELECT SUM(total) FROM table_name_45 WHERE nation = 'west germany' AND gold > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'jsonb' (example 177).", "schema": null, "sql": "CREATE TEMP TABLE foo (serial_num int, name text, type text);", "explanation": "DDL from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the average travel speed for each vessel, grouped by vessel type and month.", "schema": "CREATE TABLE vessels (vessel_id INT, vessel_name VARCHAR(50), flag_state VARCHAR(50), vessel_type VARCHAR(50)); CREATE TABLE voyages (id INT, vessel_id INT, start_port_id INT, end_port_id INT, distance FLOAT, travel_time FLOAT, voyage_date DATE);", "sql": "SELECT v.vessel_type, DATE_FORMAT(voyages.voyage_date, '%Y-%m') as time_period, AVG(voyages.distance/voyages.travel_time*24) as avg_speed FROM vessels JOIN voyages ON vessels.vessel_id = voyages.vessel_id GROUP BY v.vessel_type, time_period;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 241, "num_statements": 1} {"question": "What is the average altitude of satellites in the 'satellite_info' table?", "schema": "CREATE TABLE satellite_info (id INT PRIMARY KEY, satellite_name VARCHAR(255), country VARCHAR(255), launch_date DATE, altitude INT);", "sql": "SELECT AVG(altitude) FROM satellite_info;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "List all the transactions that occurred in the last month in the ethical labor practices retail sector.", "schema": "CREATE TABLE Transactions (transactionID int, transactionDate datetime, retailSector varchar(255)); INSERT INTO Transactions VALUES (1, '2022-01-01', 'ethical labor practices');", "sql": "SELECT * FROM Transactions WHERE transactionDate >= DATE_SUB(NOW(), INTERVAL 1 MONTH) AND retailSector = 'ethical labor practices';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Category, when Result is \"Nominated\", when Award is \"Drama Desk Award\", and when Nominee is \"Nathan Lane\"?", "schema": "CREATE TABLE table_name_95 (category VARCHAR, nominee VARCHAR, result VARCHAR, award VARCHAR)", "sql": "SELECT category FROM table_name_95 WHERE result = 'nominated' AND award = 'drama desk award' AND nominee = 'nathan lane';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player had a Score of 70-71=141?", "schema": "CREATE TABLE table_name_80 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_80 WHERE score = 70 - 71 = 141;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the quantity preserved when the fleet number was 700?", "schema": "CREATE TABLE table_name_57 (quantity_preserved VARCHAR, fleet_number_s_ VARCHAR)", "sql": "SELECT quantity_preserved FROM table_name_57 WHERE fleet_number_s_ = '700';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the maximum risk score of Brown Investment Group's portfolio in the agriculture sector?", "schema": "CREATE TABLE Brown_Investment_Group (id INT, sector VARCHAR(20), risk_score INT); INSERT INTO Brown_Investment_Group (id, sector, risk_score) VALUES (1, 'Agriculture', 50), (2, 'Manufacturing', 40);", "sql": "SELECT MAX(risk_score) FROM Brown_Investment_Group WHERE sector = 'Agriculture';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "pgTAP test for Moretap (assertion 9).", "schema": null, "sql": "SELECT is(\n (SELECT * FROM finish(NULL) LIMIT 1),\n '# Looks like you failed 1 test of 8',\n 'The output of finish(NULL) should reflect the test failure'\n);", "explanation": "SQL assertion from pgTAP test suite for Moretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.icu.utf8': Write the SELECT query (example 88).", "schema": null, "sql": "SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10;", "explanation": "Regression test for Collate.Icu.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player is in the position of Guard/Forward?", "schema": "CREATE TABLE table_name_67 (name VARCHAR, position VARCHAR)", "sql": "SELECT name FROM table_name_67 WHERE position = 'guard/forward';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many artworks are on display in the 'ImpressionistGallery'?", "schema": "CREATE TABLE Artworks (ArtworkID INT, Title VARCHAR(50), Gallery VARCHAR(50)); INSERT INTO Artworks (ArtworkID, Title, Gallery) VALUES (1, 'Starry Night', 'ImpressionistGallery'); INSERT INTO Artworks (ArtworkID, Title, Gallery) VALUES (2, 'Sunflowers', 'ImpressionistGallery');", "sql": "SELECT COUNT(*) FROM Artworks WHERE Gallery = 'ImpressionistGallery';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Identify the circular economy initiatives that have a direct impact on waste generation metrics in the waste management industry?", "schema": "CREATE TABLE circular_economy_initiatives (initiative VARCHAR(50), impacted_metrics VARCHAR(50)); INSERT INTO circular_economy_initiatives VALUES ('Product-as-a-service', 'Waste generation, Reuse rate'), ('Waste-to-Energy', 'Waste generation, Landfill capacity'), ('Recycling education programs', 'Recycling rate'), ('Biodegradable plastics', 'Waste generation, Landfill capacity');", "sql": "SELECT initiative, impacted_metrics FROM circular_economy_initiatives;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Stuffit ability that has a gzip of yes, ISO/CD images of no and LHA/LZH of no?", "schema": "CREATE TABLE table_name_75 (stuffit VARCHAR, lha_lzh VARCHAR, gzip VARCHAR, iso_cd_image VARCHAR)", "sql": "SELECT stuffit FROM table_name_75 WHERE gzip = 'yes' AND iso_cd_image = 'no' AND lha_lzh = 'no';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least lost with points more than 6 and games less than 5", "schema": "CREATE TABLE table_name_4 (lost INTEGER, points VARCHAR, games VARCHAR)", "sql": "SELECT MIN(lost) FROM table_name_4 WHERE points > 6 AND games < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 35).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the part 4 for the verb whose part 3 is borgen?", "schema": "CREATE TABLE table_1745843_7 (part_4 VARCHAR, part_3 VARCHAR)", "sql": "SELECT part_4 FROM table_1745843_7 WHERE part_3 = 'borgen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When were the callback auditions for the audition city in the episode aired on February 9, 2011?", "schema": "CREATE TABLE table_27455867_1 (callback_audition_date VARCHAR, episode_air_date VARCHAR)", "sql": "SELECT callback_audition_date FROM table_27455867_1 WHERE episode_air_date = 'February 9, 2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "How many unique donors have there been in each of the last 3 months?", "schema": "CREATE TABLE donation_timeline (id INT, donor_id INT, donation_date DATE);", "sql": "SELECT DATE_FORMAT(donation_date, '%Y-%m') as donation_month, COUNT(DISTINCT donor_id) as unique_donors FROM donation_timeline WHERE donation_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH) GROUP BY donation_month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the win-loss when 2008 has a value of Q1 at Australian Open?", "schema": "CREATE TABLE table_name_65 (career_win_loss VARCHAR, tournament VARCHAR)", "sql": "SELECT career_win_loss FROM table_name_65 WHERE 2008 = 'q1' AND tournament = 'australian open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the maximum income in the \"South\" district?", "schema": "CREATE TABLE district (name VARCHAR(20), income FLOAT); INSERT INTO district (name, income) VALUES ('North', 45000.0), ('East', 50000.0), ('West', 40000.0), ('South', 55000.0), ('South', 58000.0), ('South', 56000.0);", "sql": "SELECT MAX(income) FROM district WHERE name = 'South';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table' (example 209).", "schema": null, "sql": "create table parted_notnull_inh_test1 partition of parted_notnull_inh_test (a not null, b default 1) for values in (1);", "explanation": "DDL from PostgreSQL core regression test for Create Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the fastest lap(s) when josef newgarden led the most laps at edmonton?", "schema": "CREATE TABLE table_29690363_3 (fastest_lap VARCHAR, most_laps_led VARCHAR, race VARCHAR)", "sql": "SELECT fastest_lap FROM table_29690363_3 WHERE most_laps_led = 'Josef Newgarden' AND race = 'Edmonton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Determine the most efficient garment manufacturing countries in Q1 2022.", "schema": "CREATE TABLE garment_manufacturing (id INT, garment_id INT, country VARCHAR(20), manufacturing_date DATE, workers INT, efficiency DECIMAL(4,2));CREATE VIEW efficient_manufacturers_q1_2022 AS SELECT country, AVG(efficiency) as avg_efficiency FROM garment_manufacturing WHERE manufacturing_date >= '2022-01-01' AND manufacturing_date < '2022-04-01' AND workers > 50 GROUP BY country;", "sql": "SELECT country, avg_efficiency, RANK() OVER (ORDER BY avg_efficiency DESC) as efficiency_rank FROM efficient_manufacturers_q1_2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 1} {"question": "What is the maximum number of hours volunteered by a single volunteer in a year?", "schema": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, year INT, num_hours INT); INSERT INTO volunteer_hours (id, volunteer_id, year, num_hours) VALUES (1, 1, 2019, 100), (2, 1, 2020, 150), (3, 2, 2019, 75), (4, 2, 2020, 200), (5, 3, 2019, 125), (6, 3, 2020, 175);", "sql": "SELECT year, MAX(num_hours) as max_hours FROM volunteer_hours GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is To Par, when Country is United States, and when Score is 71-68-73=212?", "schema": "CREATE TABLE table_name_22 (to_par VARCHAR, country VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_22 WHERE country = 'united states' AND score = 71 - 68 - 73 = 212;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Archbishop Albert Daeger vacate the throne?", "schema": "CREATE TABLE table_name_63 (vacated_throne VARCHAR, archbishop VARCHAR)", "sql": "SELECT vacated_throne FROM table_name_63 WHERE archbishop = 'albert daeger';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 75).", "schema": null, "sql": "-- Operators that are commutator pairs should have identical volatility\n-- and leakproofness markings on their implementation functions.\nSELECT o1.oid, o1.oprcode, o2.oid, o2.oprcode\nFROM pg_operator AS o1, pg_operator AS o2, pg_proc AS p1, pg_proc AS p2\nWHERE o1.oprcom = o2.oid AND p1.oid = o1.oprcode AND p2.oid = o2.oprcode AND\n (p1.provolatile != p2.provolatile OR\n p1.proleakproof != p2.proleakproof);", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 414, "num_statements": 1} {"question": "Which farmers have less than 5 years of experience in the agriculture database?", "schema": "CREATE TABLE Farmers (id INT, name VARCHAR, location VARCHAR, years_of_experience INT); INSERT INTO Farmers (id, name, location, years_of_experience) VALUES (1, 'Nur Afiqah', 'Singapore', 2), (2, 'Max Schmidt', 'Berlin', 4), (3, 'Anastasia Kuznetsova', 'Moscow', 6), (4, 'Jacob Nielsen', 'Oslo', 8), (5, 'Carla Moraes', 'Sao Paulo', 10);", "sql": "SELECT name FROM Farmers WHERE years_of_experience < 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest season with a Third Driver of jimmy davies?", "schema": "CREATE TABLE table_name_2 (season INTEGER, third_driver VARCHAR)", "sql": "SELECT MAX(season) FROM table_name_2 WHERE third_driver = 'jimmy davies';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the minimum yield of corn crops by country?", "schema": "CREATE TABLE Country (id INT, name VARCHAR(255)); INSERT INTO Country (id, name) VALUES (1, 'Canada'), (2, 'Mexico'), (3, 'Brazil'); CREATE TABLE Crop (id INT, name VARCHAR(255), country_id INT, yield INT); INSERT INTO Crop (id, name, country_id, yield) VALUES (1, 'Corn', 1, 20), (2, 'Soybean', 2, 30), (3, 'Corn', 3, 10);", "sql": "SELECT MIN(Crop.yield) FROM Crop INNER JOIN Country ON Crop.country_id = Country.id WHERE Crop.name = 'Corn';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Luis Salom had the fastest lap on which circuits?", "schema": "CREATE TABLE table_18303274_1 (circuit VARCHAR, fastest_lap VARCHAR)", "sql": "SELECT circuit FROM table_18303274_1 WHERE fastest_lap = 'Luis Salom';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of students who participated in lifelong learning programs in 'Suburb B' and 'City C'?", "schema": "CREATE TABLE SuburbBLifelong (studentID INT, suburb VARCHAR(50), program VARCHAR(50)); INSERT INTO SuburbBLifelong (studentID, suburb, program) VALUES (1, 'Suburb B', 'lifelong learning'), (2, 'City C', 'lifelong learning'); CREATE TABLE CityCLifelong (studentID INT, city VARCHAR(50), program VARCHAR(50)); INSERT INTO CityCLifelong (studentID, city, program) VALUES (3, 'City C', 'lifelong learning');", "sql": "SELECT COUNT(DISTINCT studentID) FROM SuburbBLifelong WHERE suburb IN ('Suburb B', 'City C') AND program = 'lifelong learning' UNION ALL SELECT COUNT(DISTINCT studentID) FROM CityCLifelong WHERE city IN ('Suburb B', 'City C') AND program = 'lifelong learning';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 260, "num_statements": 1} {"question": "pgTAP test for Throwtap (assertion 34).", "schema": null, "sql": "SELECT * FROM check_test(\n throws_imatching(\n 'SELECT 1',\n 'HUH.+',\n 'whatever'\n ),\n false,\n 'throws_imatching(valid sql, regex, desc)',\n 'whatever',\n ' no exception thrown'\n);", "explanation": "SQL assertion from pgTAP test suite for Throwtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "How many workers are employed in each company's 'geological survey' department?", "schema": "CREATE TABLE companies (company_id INT, company_name VARCHAR(50));CREATE TABLE workers (worker_id INT, company_id INT, department VARCHAR(20), num_employees INT);", "sql": "SELECT c.company_name, w.department, SUM(w.num_employees) AS total_employees FROM workers w INNER JOIN companies c ON w.company_id = c.company_id GROUP BY w.department, c.company_name HAVING department = 'geological survey';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 224, "num_statements": 1} {"question": "What is the total number of green buildings in the UK with a gold rating?", "schema": "CREATE TABLE green_buildings (building_id INT, building_name VARCHAR(255), country VARCHAR(255), rating VARCHAR(255));", "sql": "SELECT COUNT(*) FROM green_buildings WHERE country = 'UK' AND rating = 'gold';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert_conflict' (example 80).", "schema": null, "sql": "insert into insertconflicttest values (23, 'Blackberry') on conflict (fruit) do update set fruit = excluded.fruit;", "explanation": "DML from PostgreSQL core regression test for Insert Conflict.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 114, "num_statements": 1} {"question": "pgTAP test for Unique (assertion 11).", "schema": null, "sql": "/****************************************************************************/\n-- Test col_is_unique().\n\nSELECT * FROM check_test(\n col_is_unique( 'public', 'sometab', 'name', 'public.sometab.name should be unique' ),\n true,\n 'col_is_unique( schema, table, column, description )',\n 'public.sometab.name should be unique',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Unique.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 343, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'alter_table' (example 171).", "schema": null, "sql": "INSERT INTO attmp2 values (2);", "explanation": "DML from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the results for the incumbent from Iowa 2 district?", "schema": "CREATE TABLE table_name_12 (results VARCHAR, district VARCHAR)", "sql": "SELECT results FROM table_name_12 WHERE district = 'iowa 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the call sign for frequency of 0 97.3 fm", "schema": "CREATE TABLE table_name_67 (call_sign VARCHAR, frequency VARCHAR)", "sql": "SELECT call_sign FROM table_name_67 WHERE frequency = '0 97.3 fm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many marine protected areas are there in the Caribbean Sea?", "schema": "CREATE TABLE marine_protected_areas (id INT, area_name VARCHAR(255), ocean VARCHAR(255)); INSERT INTO marine_protected_areas (id, area_name, ocean) VALUES (1, 'Area 1', 'Caribbean'), (2, 'Area 2', 'Caribbean'), (3, 'Area 3', 'Mediterranean'), (4, 'Area 4', 'Pacific');", "sql": "SELECT COUNT(*) FROM marine_protected_areas WHERE ocean = 'Caribbean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player is from Sweden?", "schema": "CREATE TABLE table_name_3 (player VARCHAR, country VARCHAR)", "sql": "SELECT player FROM table_name_3 WHERE country = 'sweden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total amount donated by recurring donors for dance performances in San Francisco?", "schema": "CREATE TABLE donors (donor_id INT, name VARCHAR(50), is_recurring BOOLEAN, city VARCHAR(50)); CREATE TABLE donations (donation_id INT, donor_id INT, amount DECIMAL(10,2), event_type VARCHAR(50)); INSERT INTO donors (donor_id, name, is_recurring, city) VALUES (1, 'Jane Smith', TRUE, 'San Francisco'); INSERT INTO donations (donation_id, donor_id, amount, event_type) VALUES (1, 1, 100.00, 'Dance');", "sql": "SELECT SUM(d.amount) AS total_donated FROM donations d JOIN donors don ON d.donor_id = don.donor_id WHERE don.city = 'San Francisco' AND don.is_recurring = TRUE AND d.event_type = 'Dance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country is Jay Haas from when he had a to par of E?", "schema": "CREATE TABLE table_name_92 (country VARCHAR, to_par VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_92 WHERE to_par = 'e' AND player = 'jay haas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the name of the graduate student who received the largest research grant in the past year?", "schema": "CREATE TABLE students (student_id INT, name TEXT); INSERT INTO students (student_id, name) VALUES (1, 'Alice Johnson'), (2, 'Bob Brown'); CREATE TABLE grants (grant_id INT, student_id INT, year INT, amount INT); INSERT INTO grants (grant_id, student_id, year, amount) VALUES (1, 1, 2021, 10000), (2, 2, 2022, 20000);", "sql": "SELECT s.name FROM students s INNER JOIN (SELECT student_id, MAX(amount) as max_amount FROM grants WHERE year = 2022 GROUP BY student_id) g ON s.student_id = g.student_id WHERE g.max_amount = (SELECT MAX(amount) FROM grants WHERE year = 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "Insert a new habitat named Australia with a size of 220000 into the 'habitats' table.", "schema": "CREATE TABLE habitats (id INT PRIMARY KEY, name VARCHAR(50), size INT);", "sql": "INSERT INTO habitats (name, size) VALUES ('Australia', 220000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average production cost of organic cotton t-shirts?", "schema": "CREATE TABLE OrganicCottonTShirts (id INT, production_cost DECIMAL);", "sql": "SELECT AVG(production_cost) FROM OrganicCottonTShirts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Determine geopolitical risk assessments with a risk_level of 'High' and their assessment_date.", "schema": "CREATE SCHEMA IF NOT EXISTS geopolitical_risk;CREATE TABLE IF NOT EXISTS geopolitical_risk_assessments (assessment_id INT, assessment_date DATE, risk_level VARCHAR(10));INSERT INTO geopolitical_risk_assessments (assessment_id, assessment_date, risk_level) VALUES (1, '2021-01-01', 'Medium'), (2, '2021-02-01', 'High'), (3, '2021-03-01', 'Low');", "sql": "SELECT assessment_date, risk_level FROM geopolitical_risk_assessments WHERE risk_level = 'High';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total budget and number of departments for each manager, grouped by manager gender?", "schema": "CREATE TABLE Manager (id INT, Name VARCHAR(50), Gender VARCHAR(10)); INSERT INTO Manager (id, Name, Gender) VALUES (101, 'Manager1', 'Male'); INSERT INTO Manager (id, Name, Gender) VALUES (102, 'Manager2', 'Female'); CREATE TABLE Department (id INT, Name VARCHAR(50), ManagerID INT, Budget FLOAT); INSERT INTO Department (id, Name, ManagerID, Budget) VALUES (1, 'Department1', 101, 500000); INSERT INTO Department (id, Name, ManagerID, Budget) VALUES (2, 'Department2', 101, 750000); INSERT INTO Department (id, Name, ManagerID, Budget) VALUES (3, 'Department3', 102, 600000);", "sql": "SELECT m.Gender, SUM(d.Budget) AS TotalBudget, COUNT(d.id) AS NumberOfDepartments FROM Manager m JOIN Department d ON m.id = d.ManagerID GROUP BY m.Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "List all space missions that have lasted more than 300 days", "schema": "CREATE TABLE SpaceMissions (id INT, mission_name VARCHAR(30), duration INT); INSERT INTO SpaceMissions (id, mission_name, duration) VALUES (1, 'Mars Exploration', 400); INSERT INTO SpaceMissions (id, mission_name, duration) VALUES (2, 'Asteroid Survey', 250);", "sql": "SELECT mission_name FROM SpaceMissions WHERE duration > 300;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the number of members who have extended their membership in the last 3 months?", "schema": "CREATE TABLE gym_memberships (id INT, member_name VARCHAR(50), start_date DATE, end_date DATE, membership_type VARCHAR(50), price DECIMAL(5,2));", "sql": "SELECT COUNT(DISTINCT member_name) AS extended_members FROM gym_memberships WHERE end_date < DATE_SUB(CURDATE(), INTERVAL 3 MONTH) AND DATE_ADD(end_date, INTERVAL 1 MONTH) > CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (partitioned_hypertable, item 38).", "schema": null, "sql": "-- FOREIGN KEY from hypertable to hypertable\nCREATE TABLE ref_ht(\n time TIMESTAMPTZ NOT NULL ,\n id INT,\n CONSTRAINT ref_ht_pkey PRIMARY KEY (time, id)\n) WITH (timescaledb.hypertable, timescaledb.partition_column='time');", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the player with a 77-68=145 score?", "schema": "CREATE TABLE table_name_56 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_56 WHERE score = 77 - 68 = 145;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Which aquatic species have health metrics above the average?", "schema": "CREATE TABLE health_metrics (id INT, species VARCHAR(50), metric FLOAT); INSERT INTO health_metrics (id, species, metric) VALUES (1, 'Tilapia', 75.0), (2, 'Catfish', 80.0), (3, 'Salmon', 60.0);", "sql": "SELECT species FROM health_metrics WHERE metric > (SELECT AVG(metric) FROM health_metrics);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average playtime for players who have played games with a price greater than 50?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Gender, Country) VALUES (1, 25, 'Male', 'USA'), (2, 30, 'Female', 'Canada'), (3, 22, 'Male', 'Mexico'); CREATE TABLE GamePlay (PlayerID INT, Playtime INT, GamePrice DECIMAL(5, 2)); INSERT INTO GamePlay (PlayerID, Playtime, GamePrice) VALUES (1, 120, 60.00), (2, 90, 45.00), (3, 150, 55.00);", "sql": "SELECT AVG(GamePlay.Playtime) FROM GamePlay INNER JOIN Players ON GamePlay.PlayerID = Players.PlayerID WHERE GamePrice > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the total quantity of items with type 'I' in warehouse S and warehouse T?", "schema": "CREATE TABLE warehouse_s(item_id INT, item_type VARCHAR(10), quantity INT);CREATE TABLE warehouse_t(item_id INT, item_type VARCHAR(10), quantity INT);INSERT INTO warehouse_s(item_id, item_type, quantity) VALUES (1, 'I', 200), (2, 'I', 300), (3, 'I', 50);INSERT INTO warehouse_t(item_id, item_type, quantity) VALUES (1, 'I', 150), (2, 'I', 250), (3, 'I', 40);", "sql": "SELECT quantity FROM warehouse_s WHERE item_type = 'I' UNION ALL SELECT quantity FROM warehouse_t WHERE item_type = 'I';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the type for hobart college", "schema": "CREATE TABLE table_1974482_1 (type VARCHAR, institution VARCHAR)", "sql": "SELECT type FROM table_1974482_1 WHERE institution = 'Hobart College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which district had first elected earlier than 2006 for representation of Broken Arrow, Tulsa?", "schema": "CREATE TABLE table_name_7 (district VARCHAR, first_elected VARCHAR, towns_represented VARCHAR)", "sql": "SELECT district FROM table_name_7 WHERE first_elected < 2006 AND towns_represented = 'broken arrow, tulsa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Which cruelty-free cosmetic brands are most popular in the US?", "schema": "CREATE TABLE brands (brand_id INT, brand_name TEXT, is_cruelty_free BOOLEAN); INSERT INTO brands (brand_id, brand_name, is_cruelty_free) VALUES (1, 'Flawless Faced', true), (2, 'Naturally Radiant', true), (3, 'Cruelty Free Cosmetics', true), (4, 'Animal Tested Brands', false); CREATE TABLE sales (sale_id INT, brand_id INT, sale_quantity INT, sale_country TEXT); INSERT INTO sales (sale_id, brand_id, sale_quantity, sale_country) VALUES (1, 1, 500, 'US'), (2, 2, 700, 'US'), (3, 3, 300, 'US'), (4, 4, 800, 'US'), (5, 1, 600, 'CA'), (6, 2, 400, 'CA');", "sql": "SELECT b.brand_name, SUM(s.sale_quantity) as total_sales_quantity FROM sales s JOIN brands b ON s.brand_id = b.brand_id WHERE b.is_cruelty_free = true AND s.sale_country = 'US' GROUP BY b.brand_name ORDER BY total_sales_quantity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What happened in Mazar-i-sharif?", "schema": "CREATE TABLE table_name_3 (circumstances VARCHAR, location VARCHAR)", "sql": "SELECT circumstances FROM table_name_3 WHERE location = 'mazar-i-sharif';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 22 is the number what is the episode title?", "schema": "CREATE TABLE table_29475589_3 (episode_title VARCHAR, no VARCHAR)", "sql": "SELECT episode_title FROM table_29475589_3 WHERE no = 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the average wind speed and solar radiation for each city by month?", "schema": "CREATE TABLE sensors (id INT, city VARCHAR(255), type VARCHAR(255), value FLOAT, timestamp TIMESTAMP); INSERT INTO sensors (id, city, type, value, timestamp) VALUES (1, 'EcoCity', 'Wind Speed', 7.2, '2022-04-01 10:00:00'), (2, 'EcoCity', 'Solar Radiation', 500, '2022-04-01 10:00:00');", "sql": "SELECT city, type, AVG(value) as avg_value, DATE_FORMAT(timestamp, '%%Y-%%m') as month FROM sensors GROUP BY city, type, month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Show all workers who have more than 5 years of experience", "schema": "CREATE TABLE experience (id INT, worker VARCHAR(50), years_of_experience INT); INSERT INTO experience (id, worker, years_of_experience) VALUES (1, 'John Doe', 3), (2, 'Jane Smith', 7), (3, 'Alice Johnson', 6);", "sql": "SELECT worker FROM experience WHERE years_of_experience > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Find the total volume of timber harvested in 'Africa' from 'coniferous' type", "schema": "CREATE TABLE forest_types (id INT, type VARCHAR(20)); INSERT INTO forest_types (id, type) VALUES (3, 'coniferous');", "sql": "SELECT SUM(volume) FROM timber_harvest t JOIN forest_types ft ON t.forest_type_id = ft.id WHERE t.harvest_location = 'Africa' AND ft.type = 'coniferous';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Find the names of graduate students who have never received a research grant.", "schema": "CREATE TABLE grad_students (id INT, name VARCHAR(50));CREATE TABLE research_grants (id INT, grant_id INT, student_id INT);", "sql": "SELECT DISTINCT gs.name FROM grad_students gs LEFT JOIN research_grants rg ON gs.id = rg.student_id WHERE rg.id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Insert a new species into the marine_species table with a conservation_status of 'Near Threatened' and a primary productivity value of 100.", "schema": "CREATE TABLE marine_species (id INT PRIMARY KEY, name VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO marine_species (id, name, conservation_status) VALUES (1, 'Humpback Whale', 'Vulnerable'); CREATE TABLE oceanography (id INT PRIMARY KEY, species_id INT, primary_productivity INT); INSERT INTO oceanography (id, species_id, primary_productivity) VALUES (1, 1, 50);", "sql": "INSERT INTO marine_species (id, name, conservation_status) VALUES (2, 'Narwhal', 'Near Threatened'); INSERT INTO oceanography (id, species_id, primary_productivity) VALUES (2, 2, 100);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 2} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 6).", "schema": null, "sql": "CREATE FUNCTION regexp_matches( citext, citext, text ) RETURNS SETOF TEXT[] AS $$\n SELECT pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END );\n$$ LANGUAGE SQL IMMUTABLE STRICT ROWS 10;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 275, "num_statements": 2} {"question": "Find the total quantity of seafood products in the inventory, grouped by supplier country.", "schema": "CREATE TABLE inventory(id INT PRIMARY KEY, product VARCHAR(50), quantity INT, supplier_id INT, supplier_country VARCHAR(50)); INSERT INTO inventory(id, product, quantity, supplier_id, supplier_country) VALUES (1, 'salmon', 100, 1, 'Canada'), (2, 'shrimp', 200, 2, 'Thailand'), (3, 'tuna', 150, 3, 'Spain'), (4, 'crab', 250, 2, 'Thailand'), (5, 'lobster', 300, 4, 'Mexico'); CREATE TABLE suppliers(id INT PRIMARY KEY, country VARCHAR(50)); INSERT INTO suppliers(id, country) VALUES (1, 'Canada'), (2, 'Thailand'), (3, 'Spain'), (4, 'Mexico'); CREATE TABLE product_types(id INT PRIMARY KEY, product VARCHAR(50), type VARCHAR(50)); INSERT INTO product_types(id, product, type) VALUES (1, 'salmon', 'seafood'), (2, 'shrimp', 'seafood'), (3, 'tuna', 'seafood'), (4, 'crab', 'seafood'), (5, 'lobster', 'seafood'), (6, 'beef', 'meat'), (7, 'chicken', 'meat');", "sql": "SELECT s.country AS supplier_country, pt.type, SUM(i.quantity) FROM inventory i JOIN suppliers s ON i.supplier_id = s.id JOIN product_types pt ON i.product = pt.product WHERE pt.type = 'seafood' GROUP BY s.country, pt.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "What is the distribution of mental health scores for students in private and public schools in the US and Canada?", "schema": "CREATE TABLE student_mental_health (student_id INT, school_type VARCHAR(50), country VARCHAR(50), score INT); INSERT INTO student_mental_health (student_id, school_type, country, score) VALUES (1, 'Private', 'USA', 75), (2, 'Public', 'Canada', 80), (3, 'Private', 'USA', 70), (4, 'Public', 'Canada', 75), (5, 'Private', 'USA', 85), (6, 'Public', 'Canada', 80);", "sql": "SELECT school_type, country, AVG(score) as avg_score, STDDEV(score) as stddev_score FROM student_mental_health GROUP BY school_type, country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the minimum depth of any underwater mountain in the Pacific ocean?", "schema": "CREATE TABLE underwater_mountains (mountain_name TEXT, location TEXT, min_depth FLOAT); INSERT INTO underwater_mountains (mountain_name, location, min_depth) VALUES ('Mountain 1', 'Pacific Ocean', 1200.0), ('Mountain 2', 'Atlantic Ocean', 900.0), ('Mountain 3', 'Pacific Ocean', 1500.0);", "sql": "SELECT MIN(min_depth) FROM underwater_mountains WHERE location = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total number of dams in India and China combined?", "schema": "CREATE TABLE Dams (DamID INT, Name TEXT, Height FLOAT, Location TEXT, Country TEXT); INSERT INTO Dams (DamID, Name, Height, Location, Country) VALUES (1, 'Tehri Dam', 260.5, 'Tehri, India', 'India'); INSERT INTO Dams (DamID, Name, Height, Location, Country) VALUES (2, 'Xiaowan Dam', 292.0, 'Luxi, China', 'China');", "sql": "SELECT COUNT(*) FROM Dams WHERE Dams.Country = 'India' OR Dams.Country = 'China';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "pgTAP test for Check (assertion 12).", "schema": null, "sql": "SELECT * FROM check_test(\n col_has_check( 'sometab', 'name', 'sometab.name should have a check' ),\n true,\n 'col_has_check( tab, col, desc )',\n 'sometab.name should have a check',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Check.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 310).", "schema": null, "sql": "INSERT INTO shoelace_data VALUES ('sl7', 7, 'brown', 60 , 'cm');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What's the average budget of TV shows in the 'Comedy' genre?", "schema": "CREATE TABLE tv_show_budgets (id INT, title VARCHAR(255), genre VARCHAR(255), budget INT); INSERT INTO tv_show_budgets (id, title, genre, budget) VALUES (1, 'The Big Bang Theory', 'Comedy', 8000000), (2, 'Friends', 'Comedy', 10000000), (3, 'Breaking Bad', 'Drama', 3000000);", "sql": "SELECT AVG(budget) FROM tv_show_budgets WHERE genre = 'Comedy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many blockings occured in the game with 198 rebounds?", "schema": "CREATE TABLE table_22993636_5 (blocks INTEGER, rebounds VARCHAR)", "sql": "SELECT MAX(blocks) FROM table_22993636_5 WHERE rebounds = 198;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many countries have launched objects into space?", "schema": "CREATE TABLE space_objects (object_name TEXT, launch_country TEXT); INSERT INTO space_objects (object_name, launch_country) VALUES ('Sputnik 1', 'USSR'), ('Explorer 1', 'USA');", "sql": "SELECT launch_country, COUNT(DISTINCT launch_country) as country_count FROM space_objects GROUP BY launch_country HAVING COUNT(DISTINCT launch_country) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the starts when the points dropped 18?", "schema": "CREATE TABLE table_24937583_1 (races__starts_ VARCHAR, points__dropped_points_ VARCHAR)", "sql": "SELECT races__starts_ FROM table_24937583_1 WHERE points__dropped_points_ = '18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average data usage for postpaid mobile customers in the city of Seattle?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, city VARCHAR(20)); CREATE TABLE postpaid_subscribers (subscriber_id INT, plan_type VARCHAR(10)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, city) VALUES (1, 5.6, 'Seattle'), (2, 7.8, 'New York'), (3, 3.4, 'Seattle'); INSERT INTO postpaid_subscribers (subscriber_id, plan_type) VALUES (1, 'postpaid'), (2, 'prepaid'), (3, 'postpaid');", "sql": "SELECT AVG(data_usage) FROM mobile_subscribers INNER JOIN postpaid_subscribers ON mobile_subscribers.subscriber_id = postpaid_subscribers.subscriber_id WHERE city = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the mascot for the enrollment 08-09 of 320?", "schema": "CREATE TABLE table_name_73 (mascot VARCHAR, enrollment_08_09 VARCHAR)", "sql": "SELECT mascot FROM table_name_73 WHERE enrollment_08_09 = 320;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many members are there in the 'solidarity_union' union?", "schema": "CREATE TABLE solidarity_union (member_id INT, name VARCHAR(50), union_joined_date DATE); INSERT INTO solidarity_union (member_id, name, union_joined_date) VALUES (29, 'Jayden Thompson', '2021-09-03'), (30, 'Katie Wright', '2021-10-12'), (31, 'Lila Jackson', '2021-11-27');", "sql": "SELECT COUNT(*) FROM solidarity_union;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country won in 1993?", "schema": "CREATE TABLE table_name_28 (country VARCHAR, year_s__won VARCHAR)", "sql": "SELECT country FROM table_name_28 WHERE year_s__won = '1993';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If Tuesday 1 June is 21' 05.27 107.351mph, what is the rider total number?", "schema": "CREATE TABLE table_25220821_3 (rider VARCHAR, tues_1_june VARCHAR)", "sql": "SELECT COUNT(rider) FROM table_25220821_3 WHERE tues_1_june = '21' 05.27 107.351mph';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete the graduate student with id 4 from the 'students' table.", "schema": "CREATE TABLE students (id INT, region TEXT, start_year INT); INSERT INTO students (id, region, start_year) VALUES (1, 'India', 2019), (2, 'China', 2020), (3, 'Japan', 2018), (4, 'Mexico', 2021);", "sql": "DELETE FROM students WHERE id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Setof (example 25).", "schema": null, "sql": "-- set-returning function that modifies its parameters\nCREATE OR REPLACE FUNCTION ugly(x int, lim int) RETURNS SETOF int AS $$\nglobal x\nwhile x <= lim:\n yield x\n x = x + 1\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Setof.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 201, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the total amount claimed in the most recently created document.", "schema": "CREATE TABLE claim_headers (amount_claimed INTEGER, claim_header_id VARCHAR); CREATE TABLE claims_documents (claim_id VARCHAR, created_date VARCHAR); CREATE TABLE claims_documents (created_date VARCHAR)", "sql": "SELECT SUM(t1.amount_claimed) FROM claim_headers AS t1 JOIN claims_documents AS t2 ON t1.claim_header_id = t2.claim_id WHERE t2.created_date = (SELECT created_date FROM claims_documents ORDER BY created_date LIMIT 1);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "List open pedagogy courses that are not yet completed by any student.", "schema": "CREATE TABLE Courses (id INT, name VARCHAR(20), completed BOOLEAN); INSERT INTO Courses (id, name, completed) VALUES (1, 'Introduction to Open Pedagogy', FALSE), (2, 'Advanced Open Pedagogy', FALSE), (3, 'SQL for Open Pedagogy', TRUE);", "sql": "SELECT * FROM Courses WHERE completed = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has a Game site of bye?", "schema": "CREATE TABLE table_name_77 (date VARCHAR, game_site VARCHAR)", "sql": "SELECT date FROM table_name_77 WHERE game_site = 'bye';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 247).", "schema": null, "sql": "SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS \"yabadabadoo\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS \"yabadabadoo\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total budget for rural infrastructure projects in Africa that were completed in the past 3 years, grouped by country?", "schema": "CREATE TABLE rural_infrastructure (id INT, project_budget INT, project_status TEXT, completion_date DATE, country TEXT); INSERT INTO rural_infrastructure (id, project_budget, project_status, completion_date, country) VALUES (1, 50000, 'completed', '2019-01-01', 'Kenya'), (2, 75000, 'in_progress', '2021-05-15', 'Nigeria'), (3, 60000, 'completed', '2020-12-31', 'South Africa');", "sql": "SELECT country, SUM(project_budget) FROM rural_infrastructure WHERE project_status = 'completed' AND completion_date >= DATE_SUB(NOW(), INTERVAL 3 YEAR) GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the nominating festival for director of 2004", "schema": "CREATE TABLE table_name_1 (nominating_festival VARCHAR, director_s_ VARCHAR)", "sql": "SELECT nominating_festival FROM table_name_1 WHERE director_s_ = '2004';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Delete records with a membership older than 5 years in the members table", "schema": "CREATE TABLE members (id INT, name VARCHAR(50), membership_start_date DATE);", "sql": "DELETE FROM members WHERE membership_start_date <= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'time' (example 8).", "schema": null, "sql": "INSERT INTO TIME_TBL VALUES ('23:59');", "explanation": "DML from PostgreSQL core regression test for Time.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which driver has a Time/Retired of +6 laps?", "schema": "CREATE TABLE table_name_21 (driver VARCHAR, time_retired VARCHAR)", "sql": "SELECT driver FROM table_name_21 WHERE time_retired = '+6 laps';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the away team when the tie no is more than 13 and the home team is team bath?", "schema": "CREATE TABLE table_name_30 (away_team VARCHAR, tie_no VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_30 WHERE tie_no > 13 AND home_team = 'team bath';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team was Paolo Quinteros on?", "schema": "CREATE TABLE table_name_59 (team VARCHAR, name VARCHAR)", "sql": "SELECT team FROM table_name_59 WHERE name = 'paolo quinteros';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the alternate for the team for which Monika Wagner is the third?", "schema": "CREATE TABLE table_name_60 (alternate VARCHAR, third VARCHAR)", "sql": "SELECT alternate FROM table_name_60 WHERE third = 'monika wagner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum depth recorded for any underwater canyon?", "schema": "CREATE TABLE underwater_canyons (canyon_name TEXT, max_depth_m INT); INSERT INTO underwater_canyons (canyon_name, max_depth_m) VALUES ('Milwaukee Deep', 8380), ('Sirena Deep', 9816), ('Tonga Trench', 10882);", "sql": "SELECT MAX(max_depth_m) FROM underwater_canyons;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 26).", "schema": null, "sql": "CREATE FUNCTION _int_contained_sel(internal, oid, internal, integer)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT STABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "PostgreSQL regression test 'macaddr8': Write the SELECT query (example 29).", "schema": null, "sql": "SELECT * FROM macaddr8_data ORDER BY 1;", "explanation": "Regression test for Macaddr8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM macaddr8_data ORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the highest assists on the November 13 game?", "schema": "CREATE TABLE table_name_56 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT high_assists FROM table_name_56 WHERE date = 'november 13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "List all events with attendance greater than 100", "schema": "CREATE TABLE EventAttendance (EventID INT PRIMARY KEY, EventName VARCHAR(100), Date DATE, TotalAttendance INT); INSERT INTO EventAttendance (EventID, EventName, Date, TotalAttendance) VALUES (1, 'Theater Performance', '2022-02-20', 150), (2, 'Art Exhibit', '2022-01-15', 125), (3, 'Music Concert', '2021-12-10', 200);", "sql": "SELECT * FROM EventAttendance WHERE TotalAttendance > 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many indigenous languages are endangered in 'Pacific Islands'?", "schema": "CREATE TABLE IndigenousLanguages (LanguageID INT PRIMARY KEY, LanguageName VARCHAR(50), Status VARCHAR(50), Location VARCHAR(50)); INSERT INTO IndigenousLanguages (LanguageID, LanguageName, Status, Location) VALUES (1, 'Rapa Nui', 'Endangered', 'Easter Island'), (2, 'Palauan', 'Vulnerable', 'Palau');", "sql": "SELECT COUNT(*) FROM IndigenousLanguages WHERE Location LIKE '%Pacific Islands%' AND Status = 'Endangered';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average safety rating for explainable AI models?", "schema": "CREATE TABLE explainable_ai (model_name TEXT, safety_rating INTEGER); INSERT INTO explainable_ai (model_name, safety_rating) VALUES ('Model D', 7), ('Model E', 9), ('Model F', 8);", "sql": "SELECT AVG(safety_rating) FROM explainable_ai WHERE model_name LIKE '%explainable%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the least silver when overall is less than 67, team is saami and gold is less than 5?", "schema": "CREATE TABLE table_name_2 (silver INTEGER, gold VARCHAR, overall VARCHAR, team VARCHAR)", "sql": "SELECT MIN(silver) FROM table_name_2 WHERE overall < 67 AND team = 'saami' AND gold < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "List all peacekeeping operations with their corresponding operation start and end dates.", "schema": "CREATE TABLE peacekeeping_operations (id INT, name TEXT, start_date DATE, end_date DATE); INSERT INTO peacekeeping_operations (id, name, start_date, end_date) VALUES (1, 'Operation1', '2010-01-01', '2015-01-01'), (2, 'Operation2', '2015-01-01', '2020-01-01'), (3, 'Operation3', '2020-01-01', '2022-01-01');", "sql": "SELECT name, start_date, end_date FROM peacekeeping_operations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many clinical trials were conducted for drug 'LMN-789'?", "schema": "CREATE TABLE clinical_trials (trial_id INTEGER, drug_name TEXT, year INTEGER);", "sql": "SELECT COUNT(*) FROM clinical_trials WHERE drug_name = 'LMN-789';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Provide the average environmental impact score for gold mines in the Western region.", "schema": "CREATE TABLE environmental_impact (mine_type VARCHAR(20), region VARCHAR(20), score FLOAT); INSERT INTO environmental_impact (mine_type, region, score) VALUES ('Gold', 'West', 70.5), ('Gold', 'West', 71.2), ('Gold', 'East', 75.0), ('Gold', 'East', 73.1), ('Gold', 'North', 68.8), ('Gold', 'South', 72.4);", "sql": "SELECT AVG(score) FROM environmental_impact WHERE mine_type = 'Gold' AND region = 'West';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Update the temperature sensor readings to 25 degrees Celsius for sensor_id 12", "schema": "CREATE TABLE temperature_sensor_data (sensor_id INT, temperature FLOAT, timestamp TIMESTAMP); INSERT INTO temperature_sensor_data (sensor_id, temperature, timestamp) VALUES (12, 23.6, '2022-05-21 10:00:00');", "sql": "WITH updated_data AS (UPDATE temperature_sensor_data SET temperature = 25 WHERE sensor_id = 12 RETURNING *) SELECT * FROM updated_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "What is the total revenue generated by hotel bookings in the APAC region in Q1 2022?", "schema": "CREATE TABLE bookings (booking_id INT, booking_date DATE, region VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO bookings (booking_id, booking_date, region, revenue) VALUES (1, '2022-01-01', 'APAC', 100), (2, '2022-02-01', 'APAC', 200), (3, '2022-03-01', 'APAC', 300);", "sql": "SELECT SUM(revenue) FROM bookings WHERE region = 'APAC' AND booking_date >= '2022-01-01' AND booking_date < '2022-04-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the total number of shelter units constructed in Iraq and Lebanon, grouped by construction company?", "schema": "CREATE TABLE shelter_info (id INT, company VARCHAR(255), country VARCHAR(255), units_constructed INT); INSERT INTO shelter_info (id, company, country, units_constructed) VALUES ('1', 'Alpha Construction', 'Iraq', '10'), ('2', 'Beta Construction', 'Iraq', '15'), ('3', 'Gamma Construction', 'Lebanon', '20'), ('4', 'Delta Construction', 'Lebanon', '25'), ('5', 'Epsilon Construction', 'Iraq', '30'), ('6', 'Zeta Construction', 'Lebanon', '35');", "sql": "SELECT company, SUM(units_constructed) as total_units FROM shelter_info WHERE country IN ('Iraq', 'Lebanon') GROUP BY company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "What is the distribution of cases by case type, in each district, in the last month?", "schema": "CREATE TABLE cases (case_id INT, district_id INT, case_type VARCHAR(20), open_date DATE); INSERT INTO cases (case_id, district_id, case_type, open_date) VALUES (1, 1, 'Civil', '2022-01-05'), (2, 2, 'Criminal', '2022-03-10'), (3, 1, 'Civil', '2022-04-01');", "sql": "SELECT cases.district_id, cases.case_type, COUNT(*) as num_cases FROM cases WHERE open_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY cases.district_id, cases.case_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is listed under mens singles when womens has wang nan zhang yining?", "schema": "CREATE TABLE table_28138035_27 (mens_singles VARCHAR, womens_doubles VARCHAR)", "sql": "SELECT mens_singles FROM table_28138035_27 WHERE womens_doubles = 'Wang Nan Zhang Yining';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Which shrimp farms in Ecuador have experienced water temperatures above 30°C in the last 30 days?", "schema": "CREATE TABLE shrimp_farms (id INT, name TEXT, country TEXT, latitude DECIMAL(9,6), longitude DECIMAL(9,6)); INSERT INTO shrimp_farms (id, name, country, latitude, longitude) VALUES (1, 'Farm C', 'Ecuador', -2.123456, -79.123456); INSERT INTO shrimp_farms (id, name, country, latitude, longitude) VALUES (2, 'Farm D', 'Ecuador', -1.123456, -78.123456); CREATE TABLE shrimp_temperature_data (id INT, farm_id INT, timestamp TIMESTAMP, temperature DECIMAL(5,2)); INSERT INTO shrimp_temperature_data (id, farm_id, timestamp, temperature) VALUES (1, 1, '2022-05-01 00:00:00', 28.5); INSERT INTO shrimp_temperature_data (id, farm_id, timestamp, temperature) VALUES (2, 1, '2022-05-02 00:00:00', 31.2); INSERT INTO shrimp_temperature_data (id, farm_id, timestamp, temperature) VALUES (3, 2, '2022-05-01 00:00:00', 29.8); INSERT INTO shrimp_temperature_data (id, farm_id, timestamp, temperature) VALUES (4, 2, '2022-05-02 00:00:00', 27.6);", "sql": "SELECT st.farm_id, sf.name FROM shrimp_temperature_data st JOIN shrimp_farms sf ON st.farm_id = sf.id WHERE sf.country = 'Ecuador' AND st.temperature > 30 AND st.timestamp >= NOW() - INTERVAL 30 DAY;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was someone first elected in the district with incumbent john smith?", "schema": "CREATE TABLE table_2668378_18 (first_elected VARCHAR, incumbent VARCHAR)", "sql": "SELECT first_elected FROM table_2668378_18 WHERE incumbent = 'John Smith';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Country that has a Player of Scott Hoch?", "schema": "CREATE TABLE table_name_31 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_31 WHERE player = 'scott hoch';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the highest Year for the Venue of Santiago De Chile and the Event of 800 m?", "schema": "CREATE TABLE table_name_67 (year INTEGER, event VARCHAR, venue VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_67 WHERE event = '800 m' AND venue = 'santiago de chile';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 81).", "schema": null, "sql": "SELECT * FROM check_test(\n functions_are( array_append(___myfunk('check_test'), '__booyah__'), 'whatever' ),\n false,\n 'functions_are(functions, desc) + extra & missing',\n 'whatever',\n ' Extra functions:\n check_test\n Missing functions:\n __booyah__'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 55).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,\"a\"], \"ndistinct\" : 4}]', 'pg_ndistinct');", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,\"a\"], \"ndistinct\" : 4}]', 'pg_ndistinct')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 8).", "schema": null, "sql": "CREATE OPERATOR ~<=~ (\n LEFTARG = CITEXT,\n RIGHTARG = CITEXT,\n NEGATOR = ~>~,\n COMMUTATOR = ~>=~,\n PROCEDURE = citext_pattern_le,\n RESTRICT = scalarltsel,\n JOIN = scalarltjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 219, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Position, when Laps is 156?", "schema": "CREATE TABLE table_name_90 (position VARCHAR, laps VARCHAR)", "sql": "SELECT position FROM table_name_90 WHERE laps = 156;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tournament was held on 9 Jun 2002?", "schema": "CREATE TABLE table_name_57 (tournament VARCHAR, date VARCHAR)", "sql": "SELECT tournament FROM table_name_57 WHERE date = '9 jun 2002';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many astronauts are from each country?", "schema": "CREATE TABLE astronauts (id INT PRIMARY KEY, astronaut_name VARCHAR(50), country VARCHAR(50));", "sql": "SELECT country, COUNT(*) as num_astronauts FROM astronauts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Total number of therapy sessions conducted in each country.", "schema": "CREATE TABLE therapy_sessions (session_id INT, patient_id INT, country VARCHAR(50), session_count INT); INSERT INTO therapy_sessions (session_id, patient_id, country, session_count) VALUES (1, 1, 'USA', 5), (2, 2, 'Canada', 3), (3, 3, 'Japan', 4);", "sql": "SELECT country, COUNT(DISTINCT patient_id) AS number_of_patients, SUM(session_count) AS total_sessions FROM therapy_sessions GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "What is the total biomass of all the trees in the Trees table, if each tree has a biomass of 0.022 pounds per year per inch of age?", "schema": "CREATE TABLE Trees (id INT, species VARCHAR(255), age INT); INSERT INTO Trees (id, species, age) VALUES (1, 'Oak', 50), (2, 'Pine', 30), (3, 'Maple', 40);", "sql": "SELECT SUM(age * 0.022) FROM Trees;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 279).", "schema": null, "sql": "select jsonb_path_query('\"-inf\"', '$.double()', silent => true);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"-inf\"', '$.double()', silent => true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many appelations are in Napa Country?", "schema": "CREATE TABLE APPELLATIONS (County VARCHAR)", "sql": "SELECT COUNT(*) FROM APPELLATIONS WHERE County = 'Napa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'merge' (example 493).", "schema": null, "sql": "CREATE TABLE measurement_y2006m02 (\n CHECK ( logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' )\n) INHERITS (measurement) WITH (autovacuum_enabled=off);", "explanation": "DDL from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the player with a score of 70-69=139?", "schema": "CREATE TABLE table_name_37 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_37 WHERE score = 70 - 69 = 139;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total budget for all education programs?", "schema": "CREATE VIEW Education_Programs AS SELECT 'Wildlife_Ambassadors' AS program, 15000 AS budget UNION SELECT 'Conservation_Champions', 20000;", "sql": "SELECT SUM(budget) FROM Education_Programs;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 132).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which owner owns the most dogs? List the owner id, first name and last name.", "schema": "CREATE TABLE Owners (first_name VARCHAR, last_name VARCHAR, owner_id VARCHAR); CREATE TABLE Dogs (owner_id VARCHAR)", "sql": "SELECT T1.owner_id, T2.first_name, T2.last_name FROM Dogs AS T1 JOIN Owners AS T2 ON T1.owner_id = T2.owner_id GROUP BY T1.owner_id ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of people in attendance when the decision was brodeur and the home was ottawa?", "schema": "CREATE TABLE table_name_60 (attendance VARCHAR, decision VARCHAR, home VARCHAR)", "sql": "SELECT attendance FROM table_name_60 WHERE decision = 'brodeur' AND home = 'ottawa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 22).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"ndistinct\" : 4}]', 'pg_ndistinct');", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"ndistinct\" : 4}]', 'pg_ndistinct')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What was the total amount donated by individual donors from the US in Q1 2021?", "schema": "CREATE TABLE donors (id INT, name TEXT, country TEXT, donation FLOAT); INSERT INTO donors (id, name, country, donation) VALUES (1, 'John Doe', 'USA', 500.00), (2, 'Jane Smith', 'Canada', 300.00);", "sql": "SELECT SUM(donation) FROM donors WHERE country = 'USA' AND donation_date BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the name of the community health worker who conducted the least number of mental health parity consultations in New York?", "schema": "CREATE TABLE community_health_workers (id INT, name TEXT, zip TEXT, consultations INT); INSERT INTO community_health_workers (id, name, zip, consultations) VALUES (1, 'John Doe', '10001', 10), (2, 'Jane Smith', '11223', 20); CREATE VIEW ny_workers AS SELECT * FROM community_health_workers WHERE zip BETWEEN '10001' AND '11999';", "sql": "SELECT name FROM ny_workers WHERE consultations = (SELECT MIN(consultations) FROM ny_workers);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Determine the total revenue for each strain, including those with no sales.", "schema": "CREATE TABLE Strains (id INT, name TEXT, price DECIMAL); INSERT INTO Strains (id, name, price) VALUES (1, 'Purple Haze', 12.50), (2, 'Blue Dream', 14.25), (3, 'Girl Scout Cookies', 15.00), (4, 'OG Kush', 16.00), (5, 'Sour Diesel', 13.75); CREATE TABLE Sales (id INT, strain_id INT, quantity INT, sale_date DATE); INSERT INTO Sales (id, strain_id, quantity, sale_date) VALUES (1, 1, 50, '2022-01-01'), (2, 1, 75, '2022-01-05'), (3, 2, 60, '2022-01-03'), (4, 2, 40, '2022-01-07'), (5, 4, 100, '2022-01-06');", "sql": "SELECT Strains.name, COALESCE(SUM(Strains.price * Sales.quantity), 0) as total_revenue FROM Strains LEFT JOIN Sales ON Strains.id = Sales.strain_id GROUP BY Strains.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Which space telescope has the largest primary mirror?", "schema": "CREATE TABLE space_telescopes (name TEXT, primary_mirror_m INTEGER); INSERT INTO space_telescopes (name, primary_mirror_m) VALUES ('Hubble Space Telescope', 2400), ('James Webb Space Telescope', 6500), ('Spitzer Space Telescope', 850);", "sql": "SELECT name FROM space_telescopes ORDER BY primary_mirror_m DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total cost of ingredients for vegan dishes, updated to reflect the current inventory levels?", "schema": "CREATE TABLE ingredients (id INT, ingredient_name TEXT, unit_price DECIMAL, quantity INT);", "sql": "SELECT SUM(unit_price * quantity) FROM ingredients WHERE ingredient_name IN (SELECT ingredient_name FROM menu_items WHERE is_vegan = TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the least amount for Goals Olimpia?", "schema": "CREATE TABLE table_13688489_1 (goals_olimpia INTEGER)", "sql": "SELECT MIN(goals_olimpia) FROM table_13688489_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 22).", "schema": null, "sql": "SELECT 'xAb' ~* '[W-Y]' COLLATE PG_C_UTF8;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'xAb' ~* '[W-Y]' COLLATE PG_C_UTF8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Which dams are located in 'Texas' and their respective construction costs from the 'dams' and 'dam_construction_costs' tables?", "schema": "CREATE TABLE dams (id INT, name VARCHAR(255), location VARCHAR(255)); CREATE TABLE dam_construction_costs (dam_id INT, cost DECIMAL(10, 2));", "sql": "SELECT d.name, dcc.cost as construction_cost FROM dams d INNER JOIN dam_construction_costs dcc ON d.id = dcc.dam_id WHERE d.location = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the maximum salary paid to a worker in the workforce development sector?", "schema": "CREATE TABLE workers (worker_id INT, sector VARCHAR(255), salary DECIMAL(10,2)); INSERT INTO workers (worker_id, sector, salary) VALUES (1, 'Workforce Development', 80000.00), (2, 'Workforce Development', 85000.00), (3, 'Workforce Development', 70000.00);", "sql": "SELECT MAX(salary) FROM workers WHERE sector = 'Workforce Development';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE TRIGGER (example 4).", "schema": null, "sql": "CREATE TRIGGER log_update AFTER UPDATE ON accounts FOR EACH ROW WHEN (OLD.* IS DISTINCT FROM NEW.*) EXECUTE FUNCTION log_account_update();", "explanation": "PostgreSQL CREATE TRIGGER command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "Add a new department to the \"departments\" table", "schema": "CREATE TABLE departments (id INT, department VARCHAR(50));", "sql": "INSERT INTO departments (id, department) VALUES (5, 'Diversity & Inclusion');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Goals have Games smaller than 41, and a Player of mark amos?", "schema": "CREATE TABLE table_name_57 (goals INTEGER, games VARCHAR, player VARCHAR)", "sql": "SELECT MIN(goals) FROM table_name_57 WHERE games < 41 AND player = 'mark amos';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List all community groups that Officer 007 has interacted with in the last month", "schema": "CREATE TABLE community_groups (id SERIAL PRIMARY KEY, name VARCHAR(255)); CREATE TABLE officer_details (id SERIAL PRIMARY KEY, name VARCHAR(255), badge_number INTEGER); CREATE TABLE community_interaction (id SERIAL PRIMARY KEY, community_group_id INTEGER, officer_id INTEGER, interaction_type VARCHAR(255), interaction_timestamp TIMESTAMP); INSERT INTO community_groups (name) VALUES ('Neighborhood Watch'), ('Youth Council'), ('Senior Citizens Club'); INSERT INTO officer_details (name, badge_number) VALUES ('John Doe', 7), ('Jane Smith', 8); INSERT INTO community_interaction (community_group_id, officer_id, interaction_type, interaction_timestamp) VALUES (1, 7, 'Meeting', '2023-03-15 10:00:00'), (2, 8, 'Presentation', '2023-03-20 14:00:00');", "sql": "SELECT cg.name FROM community_groups cg JOIN community_interaction ci ON cg.id = ci.community_group_id JOIN officer_details od ON od.id = ci.officer_id WHERE od.badge_number = 7 AND ci.interaction_timestamp >= (CURRENT_DATE - INTERVAL '1 month');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 246, "num_statements": 1} {"question": "Determine the AI safety algorithms that have at least one transaction but are not included in any AI bias mitigation transaction lists.", "schema": "CREATE TABLE ai_safety_algorithms_2 (id INT, algorithm_name VARCHAR(30)); INSERT INTO ai_safety_algorithms_2 (id, algorithm_name) VALUES (1, 'SafeAI 1.1'); INSERT INTO ai_safety_algorithms_2 (id, algorithm_name) VALUES (2, 'SafeAI 2.1'); INSERT INTO ai_safety_algorithms_2 (id, algorithm_name) VALUES (3, 'SafeAI 3.1'); INSERT INTO ai_safety_algorithms_2 (id, algorithm_name) VALUES (4, 'SafeAI 4.1'); CREATE TABLE ai_bias_mitigation_transaction_lists (algorithm_id INT); INSERT INTO ai_bias_mitigation_transaction_lists (algorithm_id) VALUES (1); INSERT INTO ai_bias_mitigation_transaction_lists (algorithm_id) VALUES (3);", "sql": "SELECT algorithm_name FROM ai_safety_algorithms_2 WHERE id IN (SELECT id FROM transactions) AND id NOT IN (SELECT algorithm_id FROM ai_bias_mitigation_transaction_lists);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the change in CO2 emissions for each mine over time?", "schema": "CREATE TABLE environmental_impact (id INT, mine_id INT, impact_type VARCHAR(50), value INT, PRIMARY KEY (id), FOREIGN KEY (mine_id) REFERENCES mines(id)); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (1, 1, 'CO2 Emissions', 1200); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (2, 1, 'CO2 Emissions', 1250); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (3, 2, 'CO2 Emissions', 2000); INSERT INTO environmental_impact (id, mine_id, impact_type, value) VALUES (4, 2, 'CO2 Emissions', 2100); CREATE TABLE mines (id INT, name VARCHAR(50), location VARCHAR(50), annual_production INT, PRIMARY KEY (id)); INSERT INTO mines (id, name, location, annual_production) VALUES (1, 'Golden Mine', 'California', 15000); INSERT INTO mines (id, name, location, annual_production) VALUES (2, 'Silver Mine', 'Nevada', 22000);", "sql": "SELECT mine_id, impact_type, value, LAG(value) OVER (PARTITION BY mine_id ORDER BY id) as previous_value FROM environmental_impact;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 1} {"question": "What is the total number of legal service providers and the number of providers offering services in Spanish, by state?", "schema": "CREATE TABLE legal_service_providers (id INT, state VARCHAR(50), offers_spanish_services BOOLEAN);", "sql": "SELECT state, COUNT(*) total_providers, SUM(CASE WHEN offers_spanish_services = TRUE THEN 1 ELSE 0 END) AS providers_spanish_services FROM legal_service_providers GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of golds for ranks of 6 and totals over 3?", "schema": "CREATE TABLE table_name_36 (gold INTEGER, rank VARCHAR, total VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_36 WHERE rank = '6' AND total > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What high school did Jeff Malm attend?", "schema": "CREATE TABLE table_11677100_15 (school VARCHAR, player VARCHAR)", "sql": "SELECT school FROM table_11677100_15 WHERE player = 'Jeff Malm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average carbon footprint of travelers from each continent visiting sustainable destinations?", "schema": "CREATE TABLE continents (continent_name VARCHAR(50), country_code CHAR(3));CREATE TABLE travelers (traveler_id INT, country_code CHAR(3), carbon_footprint INT);CREATE TABLE destinations (destination_id INT, destination_name VARCHAR(100), is_sustainable BOOLEAN);CREATE TABLE visits (visit_id INT, traveler_id INT, destination_id INT);INSERT INTO continents (continent_name, country_code) VALUES ('North America', 'USA'), ('North America', 'CAN'), ('Europe', 'DEU'), ('Europe', 'GBR'), ('Asia', 'CHN'), ('Africa', 'ZAF'), ('Oceania', 'AUS');", "sql": "SELECT c.continent_name, AVG(t.carbon_footprint) FROM continents c JOIN (SELECT traveler_id, carbon_footprint FROM travelers t JOIN visits v ON t.traveler_id = v.traveler_id WHERE v.destination_id IN (SELECT destination_id FROM destinations d WHERE d.is_sustainable = TRUE)) subquery ON c.country_code = subquery.country_code GROUP BY c.continent_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 352, "num_statements": 1} {"question": "How many properties in each borough have a SustainabilityRating of at least 2?", "schema": "CREATE TABLE Properties (PropertyID int, Price int, Borough varchar(255), SustainabilityRating int); INSERT INTO Properties (PropertyID, Price, Borough, SustainabilityRating) VALUES (1, 350000, 'Manhattan', 2);", "sql": "SELECT Borough, COUNT(*) as PropertyCount FROM Properties WHERE SustainabilityRating >= 2 GROUP BY Borough;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "PostgreSQL regression test 'cluster': Write the SELECT query (example 117).", "schema": null, "sql": "select * from clstr_temp;", "explanation": "Regression test for Cluster in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from clstr_temp) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Delete a product from the \"products\" table", "schema": "CREATE TABLE products (product_id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2));", "sql": "DELETE FROM products WHERE product_id = 1001;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the name of the disability support programs offered in a specific state?", "schema": "CREATE TABLE States (StateID INT, State VARCHAR(50)); INSERT INTO States (StateID, State) VALUES (1, 'California'); INSERT INTO States (StateID, State) VALUES (2, 'Texas');", "sql": "SELECT SupportPrograms.ProgramName FROM SupportPrograms INNER JOIN States ON SupportPrograms.StateID = States.StateID WHERE States.State = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Add sales data for the month of January 2022", "schema": "CREATE TABLE sales (id INT PRIMARY KEY, product VARCHAR(255), quantity INT, sale_date DATE); INSERT INTO sales (id, product, quantity, sale_date) VALUES (1, 'Sativa Flower', 25, '2022-01-01');", "sql": "INSERT INTO sales (id, product, quantity, sale_date) VALUES (2, 'Indica Flower', 30, '2022-01-05'); INSERT INTO sales (id, product, quantity, sale_date) VALUES (3, 'Hybrid Flower', 35, '2022-01-10');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 2} {"question": "How many tree species have a total volume greater than 1000 cubic meters in the boreal forest?", "schema": "CREATE TABLE biomes (biome_id INT PRIMARY KEY, name VARCHAR(50), area_km2 FLOAT); INSERT INTO biomes (biome_id, name, area_km2) VALUES (1, 'Tropical Rainforest', 15000000.0), (2, 'Temperate Rainforest', 250000.0), (3, 'Boreal Forest', 12000000.0); CREATE TABLE trees (tree_id INT PRIMARY KEY, species VARCHAR(50), biome_id INT, volume FLOAT, FOREIGN KEY (biome_id) REFERENCES biomes(biome_id)); INSERT INTO trees (tree_id, species, biome_id, volume) VALUES (1, 'Norway Spruce', 3, 500.0), (2, 'Scots Pine', 3, 750.0), (3, 'Birch', 3, 450.0);", "sql": "SELECT COUNT(DISTINCT species) FROM trees JOIN biomes ON trees.biome_id = biomes.biome_id GROUP BY biomes.name HAVING SUM(trees.volume) > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 212).", "schema": null, "sql": "create view vv5 as select x,y,z from tt9 join tt10 using(x);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the game resulting in a 5-11 record, who scored the high rebounds?", "schema": "CREATE TABLE table_17118657_8 (high_rebounds VARCHAR, record VARCHAR)", "sql": "SELECT high_rebounds FROM table_17118657_8 WHERE record = '5-11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the percentage of community health workers who identify as Hispanic or Latino by state?", "schema": "CREATE TABLE States (state_id INT, state_name TEXT); CREATE TABLE CommunityHealthWorkers (worker_id INT, worker_ethnicity TEXT, state_id INT);", "sql": "SELECT COUNT(*) FILTER (WHERE worker_ethnicity = 'Hispanic or Latino') * 100.0 / COUNT(*) as pct_hispanic_workers, s.state_name FROM CommunityHealthWorkers chw JOIN States s ON chw.state_id = s.state_id GROUP BY s.state_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 223, "num_statements": 1} {"question": "List all unions with their corresponding headquarters' addresses.", "schema": "CREATE TABLE if not exists unions (union_id INT, union_name TEXT, headquarters_address TEXT); INSERT INTO unions (union_id, union_name, headquarters_address) VALUES (1, 'United Steelworkers', '60 Boulevard of the Allies, Pittsburgh, PA 15222'), (2, 'Teamsters', '25 Louisiana Ave NW, Washington, DC 20001'), (3, 'UAW', '8000 E Jefferson Ave, Detroit, MI 48214');", "sql": "SELECT * FROM unions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 420).", "schema": null, "sql": "select unnest(array(select '11 22 33'::int2vector from generate_series(1,5)));", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select unnest(array(select '11 22 33'::int2vector from generate_series(1,5)))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average round of the match in the Hero's 9 event with a time of 5:00?", "schema": "CREATE TABLE table_name_7 (round INTEGER, time VARCHAR, event VARCHAR)", "sql": "SELECT AVG(round) FROM table_name_7 WHERE time = '5:00' AND event = 'hero's 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Count the number of trips that did not end in San Francisco city.", "schema": "CREATE TABLE trip (end_station_id VARCHAR); CREATE TABLE station (id VARCHAR, city VARCHAR)", "sql": "SELECT COUNT(*) FROM trip AS T1 JOIN station AS T2 ON T1.end_station_id = T2.id WHERE T2.city <> \"San Francisco\";", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "pgTAP test for Ruletap (assertion 16).", "schema": null, "sql": "SELECT * FROM check_test(\n has_rule( 'sometab', 'ins_me', 'whatever' ),\n true,\n 'has_rule(table, rule, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ruletap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the overall score when the set 2 score is 25–18?", "schema": "CREATE TABLE table_name_57 (score VARCHAR, set_2 VARCHAR)", "sql": "SELECT score FROM table_name_57 WHERE set_2 = '25–18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What are the earliest and latest departure times for trams in the suburbs?", "schema": "CREATE TABLE schedules (route_id INT, vehicle_id INT, departure_time TIME); INSERT INTO schedules VALUES (1, 1, '06:00:00'), (1, 2, '06:15:00'), (1, 3, '06:30:00'), (2, 4, '07:00:00'), (2, 5, '07:15:00'); CREATE TABLE routes (route_id INT, city VARCHAR(50), type VARCHAR(50)); INSERT INTO routes VALUES (1, 'City Center', 'Bus'), (2, 'Suburbs', 'Tram');", "sql": "SELECT MIN(departure_time) AS earliest, MAX(departure_time) AS latest FROM schedules JOIN routes ON schedules.route_id = routes.route_id WHERE routes.city = 'Suburbs' AND routes.type = 'Tram';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the gold for silver of 39", "schema": "CREATE TABLE table_name_56 (gold VARCHAR, silver VARCHAR)", "sql": "SELECT gold FROM table_name_56 WHERE silver = 39;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total number of hours played by all players in Valorant?", "schema": "CREATE TABLE players (id INT, name VARCHAR(50), age INT, game VARCHAR(50), hours_played INT); INSERT INTO players (id, name, age, game, hours_played) VALUES (1, 'John Doe', 25, 'Valorant', 50);", "sql": "SELECT SUM(hours_played) AS total_hours FROM players WHERE game = 'Valorant';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the number of public libraries in Seattle, Washington that offer free Wi-Fi access?", "schema": "CREATE TABLE public_libraries (library_id INT, library_name TEXT, city TEXT, state TEXT, wi_fi_access BOOLEAN); INSERT INTO public_libraries (library_id, library_name, city, state, wi_fi_access) VALUES (1, 'Seattle Central Library', 'Seattle', 'Washington', TRUE); INSERT INTO public_libraries (library_id, library_name, city, state, wi_fi_access) VALUES (2, 'The Seattle Public Library - Ballard Branch', 'Seattle', 'Washington', TRUE); INSERT INTO public_libraries (library_id, library_name, city, state, wi_fi_access) VALUES (3, 'The Seattle Public Library - Green Lake Branch', 'Seattle', 'Washington', FALSE);", "sql": "SELECT COUNT(*) FROM public_libraries WHERE city = 'Seattle' AND state = 'Washington' AND wi_fi_access = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the total yield of crops by country in 'crop_distribution' table?", "schema": "CREATE TABLE crop_distribution (country VARCHAR(50), crop VARCHAR(50), yield INT); INSERT INTO crop_distribution (country, crop, yield) VALUES ('Canada', 'corn', 1000), ('Canada', 'wheat', 2000), ('USA', 'corn', 3000), ('USA', 'wheat', 4000), ('Mexico', 'corn', 2500), ('Mexico', 'wheat', 1500); CREATE TABLE country_total (country VARCHAR(50), total INT); INSERT INTO country_total (country, total) SELECT country, SUM(yield) FROM crop_distribution GROUP BY country;", "sql": "SELECT ct.country, ct.total FROM country_total ct;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: with delivery date 1997 1997 what is the owns?", "schema": "CREATE TABLE table_name_65 (owns VARCHAR, delivery_date VARCHAR)", "sql": "SELECT owns FROM table_name_65 WHERE delivery_date = '1997 1997';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Marriage has a Became Duchess of 12 december 1666?", "schema": "CREATE TABLE table_name_68 (marriage VARCHAR, became_duchess VARCHAR)", "sql": "SELECT marriage FROM table_name_68 WHERE became_duchess = '12 december 1666';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of points the cosworth straight-4 engine scored?", "schema": "CREATE TABLE table_name_51 (pts INTEGER, engine VARCHAR)", "sql": "SELECT MAX(pts) FROM table_name_51 WHERE engine = 'cosworth straight-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'portals': Write the SELECT query (example 288).", "schema": null, "sql": "SELECT * FROM current_check;", "explanation": "Regression test for Portals in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM current_check) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Update the mobile_plans table to add a new plan with a name \"Unlimited Data Family Plan\" and monthly_cost 80.00", "schema": "CREATE TABLE mobile_plans (plan_id INT, plan_name VARCHAR(50), monthly_cost DECIMAL(5,2));", "sql": "UPDATE mobile_plans SET plan_name = 'Unlimited Data Family Plan', monthly_cost = 80.00 WHERE plan_id = (SELECT MAX(plan_id) FROM mobile_plans) + 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "What is the average design pressure for all dams in the database?", "schema": "CREATE TABLE Dams (id INT, name VARCHAR(100), design_pressure FLOAT); INSERT INTO Dams (id, name, design_pressure) VALUES (1, 'Hoover Dam', 4500), (2, 'Glen Canyon Dam', 2000), (3, 'Oroville Dam', 3500);", "sql": "SELECT AVG(design_pressure) FROM Dams;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the average points when played is less than 16?", "schema": "CREATE TABLE table_name_55 (points INTEGER, played INTEGER)", "sql": "SELECT AVG(points) FROM table_name_55 WHERE played < 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Please show the categories of the music festivals and the count.", "schema": "CREATE TABLE music_festival (Category VARCHAR)", "sql": "SELECT Category, COUNT(*) FROM music_festival GROUP BY Category;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had high assists in game number 9?", "schema": "CREATE TABLE table_27712702_7 (high_assists VARCHAR, game VARCHAR)", "sql": "SELECT high_assists FROM table_27712702_7 WHERE game = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the cover date where character(s) is sandman (wesley dodds)", "schema": "CREATE TABLE table_1217448_1 (cover_date VARCHAR, character_s_ VARCHAR)", "sql": "SELECT cover_date FROM table_1217448_1 WHERE character_s_ = 'Sandman (Wesley Dodds)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the number of unique users who have streamed a specific song?", "schema": "CREATE TABLE StreamingData (StreamID INT, UserID INT, SongID INT, StreamDate DATE); INSERT INTO StreamingData VALUES (1, 1, 1001, '2022-01-01'); INSERT INTO StreamingData VALUES (2, 2, 1002, '2022-01-02'); CREATE TABLE Songs (SongID INT, SongName VARCHAR(100), ArtistID INT); INSERT INTO Songs VALUES (1001, 'Shake It Off', 1); INSERT INTO Songs VALUES (1002, 'Dynamite', 1); CREATE TABLE Users (UserID INT, UserName VARCHAR(50)); INSERT INTO Users VALUES (1, 'Alice'); INSERT INTO Users VALUES (2, 'Bob');", "sql": "SELECT COUNT(DISTINCT UserID) FROM StreamingData JOIN Songs ON StreamingData.SongID = Songs.SongID WHERE Songs.SongName = 'Shake It Off';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Insert a new row into the 'vehicle_sales' table with the following values: 'Mahindra', 'Mumbai', 45", "schema": "CREATE TABLE vehicle_sales (vehicle_make VARCHAR(255), city VARCHAR(255), sales_count INT);", "sql": "INSERT INTO vehicle_sales (vehicle_make, city, sales_count) VALUES ('Mahindra', 'Mumbai', 45);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of number of bearers in 2009 for a rank above 1, a type of patronymic, an etymology meaning son of Christian, and the number of bearers in 1971 greater than 45.984?", "schema": "CREATE TABLE table_name_3 (number_of_bearers_2009 INTEGER, number_of_bearers_1971 VARCHAR, etymology VARCHAR, rank VARCHAR, type VARCHAR)", "sql": "SELECT SUM(number_of_bearers_2009) FROM table_name_3 WHERE rank > 1 AND type = 'patronymic' AND etymology = 'son of christian' AND number_of_bearers_1971 > 45.984;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tournament had a score of 6–3, 7–6?", "schema": "CREATE TABLE table_name_75 (tournament VARCHAR, score VARCHAR)", "sql": "SELECT tournament FROM table_name_75 WHERE score = '6–3, 7–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Delete retailers that have not made a purchase in the last year", "schema": "CREATE TABLE retailers(retailer_id INT, name TEXT, last_purchase_date DATE); INSERT INTO retailers(retailer_id, name, last_purchase_date) VALUES (101, 'Retailer A', '2021-12-01'), (102, 'Retailer B', '2022-02-15'), (103, 'Retailer C', NULL), (104, 'Retailer D', '2022-03-01');", "sql": "DELETE FROM retailers WHERE last_purchase_date < (CURRENT_DATE - INTERVAL '1 year');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 246).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (5,6,'-652755630.38762364608541718463145771120672223443489913059334543712856431450577465795351472116052777583325262472505543620695003436531392789029513380101663750625024853263344909355177280161504414335005574882649025508632900995595004153086358670541462762210415346958050909878501048483523600711486406055424807840429541335391538322886495085448421556770991545781035298449067051916630343957356635391594362639819978677032855590055900561501350354631803808000307050416047072513406855040715556454205065332997338225626635780147287003130754254277103928406089109802521803537038957372612837169223905290912251006321930223154562110264217937');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 662, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the release date for the standard cd release format in north america?", "schema": "CREATE TABLE table_name_18 (release_date VARCHAR, release_format VARCHAR, country VARCHAR)", "sql": "SELECT release_date FROM table_name_18 WHERE release_format = 'standard cd' AND country = 'north america';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total number of ads served to users in the United States and Canada, broken down by day, for the past week?", "schema": "CREATE TABLE ads_served (user_id INT, ad_id INT, country VARCHAR(2), serving_date DATE); INSERT INTO ads_served (user_id, ad_id, country, serving_date) VALUES (1, 1001, 'US', '2022-03-01'), (2, 1002, 'CA', '2022-03-02');", "sql": "SELECT serving_date, COUNT(*) as total_ads FROM ads_served WHERE country IN ('US', 'CA') GROUP BY serving_date ORDER BY serving_date DESC LIMIT 7;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the name for rank less than 6 and year more than 1974", "schema": "CREATE TABLE table_name_75 (name VARCHAR, rank VARCHAR, year VARCHAR)", "sql": "SELECT name FROM table_name_75 WHERE rank < 6 AND year > 1974;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many regions had an hdi of 0.896?", "schema": "CREATE TABLE table_25042332_33 (combined_gross_enrollment_ratio__2009_ VARCHAR, hdi VARCHAR)", "sql": "SELECT COUNT(combined_gross_enrollment_ratio__2009_) FROM table_25042332_33 WHERE hdi = '0.896';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total billing amount for cases handled by attorneys who identify as female?", "schema": "CREATE TABLE Attorneys (AttorneyID INT, Gender VARCHAR(255)); INSERT INTO Attorneys (AttorneyID, Gender) VALUES (1, 'Female'), (2, 'Male'), (3, 'Female'), (4, 'Non-binary'); CREATE TABLE Cases (CaseID INT, AttorneyID INT, BillingAmount DECIMAL(10,2)); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (1, 1, 5000.00), (2, 1, 7000.00), (3, 2, 3000.00), (4, 3, 8000.00), (5, 3, 9000.00);", "sql": "SELECT SUM(BillingAmount) FROM Cases JOIN Attorneys ON Cases.AttorneyID = Attorneys.AttorneyID WHERE Gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the average age of patients who had positive outcomes after CBT treatment?", "schema": "CREATE TABLE patients (patient_id INT, age INT, treatment_outcome VARCHAR(10)); INSERT INTO patients (patient_id, age, treatment_outcome) VALUES (1, 30, 'positive'), (2, 45, 'negative'), (3, 50, 'positive'); CREATE TABLE treatments (treatment_id INT, treatment_name VARCHAR(10), patient_id INT); INSERT INTO treatments (treatment_id, treatment_name, patient_id) VALUES (1, 'CBT', 1), (2, 'CBT', 2), (3, 'CBT', 3);", "sql": "SELECT AVG(patients.age) FROM patients JOIN treatments ON patients.patient_id = treatments.patient_id WHERE treatments.treatment_name = 'CBT' AND patients.treatment_outcome = 'positive';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Update the name of the character with id 3 to 'New Character Name' in the 'characters' table", "schema": "CREATE TABLE characters (id INT, name TEXT, show_id INT);", "sql": "UPDATE characters SET name = 'New Character Name' WHERE id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the number of gold medals is greater than 59, and the rank is total, what is the average bronze medals?", "schema": "CREATE TABLE table_name_89 (bronze INTEGER, rank VARCHAR, gold VARCHAR)", "sql": "SELECT AVG(bronze) FROM table_name_89 WHERE rank = 'total' AND gold > 59;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Delete the \"hotel_review_summary\" view", "schema": "CREATE TABLE hotel_reviews (hotel_id INT, review_date DATE, review_score INT); CREATE VIEW hotel_review_summary AS SELECT hotel_id, COUNT(*), AVG(review_score) FROM hotel_reviews GROUP BY hotel_id;", "sql": "DROP VIEW hotel_review_summary;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Delete workouts that have a duration longer than 2 hours for members with a Basic membership tier in the last month.", "schema": "CREATE TABLE workouts (workout_id INT, member_id INT, duration INT, date DATE); INSERT INTO workouts VALUES (10,14,130,'2022-03-05'); INSERT INTO workouts VALUES (11,15,150,'2022-03-10'); CREATE TABLE members (member_id INT, tier VARCHAR(10)); INSERT INTO members VALUES (14,'Basic'); INSERT INTO members VALUES (15,'Premium');", "sql": "DELETE FROM workouts WHERE workouts.duration > 120 AND workouts.date >= DATEADD(month, -1, GETDATE()) AND EXISTS (SELECT 1 FROM members WHERE members.member_id = workouts.member_id AND members.tier = 'Basic');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 4).", "schema": null, "sql": "CREATE FUNCTION gbt_macad8_penalty(internal,internal,internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the goals for score of 38-28", "schema": "CREATE TABLE table_name_54 (goals VARCHAR, score VARCHAR)", "sql": "SELECT goals FROM table_name_54 WHERE score = '38-28';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "List all financial capability programs in the Caribbean and Central America with a budget greater than $15,000.", "schema": "CREATE TABLE capability_programs (id INT, region VARCHAR(20), budget DECIMAL(10,2)); INSERT INTO capability_programs (id, region, budget) VALUES (1, 'Caribbean', 12000.00), (2, 'Central America', 18000.00), (3, 'Europe', 9000.00);", "sql": "SELECT * FROM capability_programs WHERE region IN ('Caribbean', 'Central America') AND budget > 15000.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Find articles with the word 'investigation' in the 'category' column.", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), content TEXT, category VARCHAR(50), publication_date DATE); INSERT INTO articles (id, title, content, category, publication_date) VALUES (1, 'Article...', '...', 'investigation', '2022-03-01');", "sql": "SELECT * FROM articles WHERE category LIKE '%investigation%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of electric ferries in Oslo and their total passenger capacity?", "schema": "CREATE TABLE electric_ferries (ferry_id INT, passenger_capacity INT, city VARCHAR(50));", "sql": "SELECT COUNT(*), SUM(passenger_capacity) FROM electric_ferries WHERE city = 'Oslo';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 92).", "schema": null, "sql": "SELECT * FROM check_test(\n function_owner_is('test_fdw', '{}'::NAME[], current_user),\n\ttrue,\n 'function_owner_is(function, args[], user)',\n 'Function test_fdw() should be owned by ' || quote_ident(current_user),\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 230, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 28).", "schema": null, "sql": "SELECT cube_ll_coord(ll_to_earth(0,-180),1)::numeric(20,5),\n cube_ll_coord(ll_to_earth(0,-180),2)::numeric(20,5),\n cube_ll_coord(ll_to_earth(0,-180),3)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "PL/pgSQL test: Pltclu--1.0 (example 2).", "schema": null, "sql": "CREATE LANGUAGE pltclu\n HANDLER pltclu_call_handler;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltclu--1.0.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "How many defense contracts were awarded each month in 2020?", "schema": "CREATE TABLE ContractMonths (ContractID INT, ContractDate DATE); INSERT INTO ContractMonths (ContractID, ContractDate) VALUES (1, '2020-01-15'), (2, '2020-02-10'), (3, '2020-03-20'), (4, '2020-04-25'), (5, '2020-05-10'), (6, '2020-06-18'), (7, '2020-07-05'), (8, '2020-08-12'), (9, '2020-09-20'), (10, '2020-10-30'), (11, '2020-11-15'), (12, '2020-12-28');", "sql": "SELECT EXTRACT(MONTH FROM ContractDate) AS Month, COUNT(*) FROM ContractMonths WHERE ContractDate BETWEEN '2020-01-01' AND '2020-12-31' GROUP BY Month ORDER BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is total number of wins for England?", "schema": "CREATE TABLE table_name_59 (total INTEGER, nation VARCHAR)", "sql": "SELECT SUM(total) FROM table_name_59 WHERE nation = 'england';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Update the 'battery_range' column for the 'Tesla_Model_S' row to 405 miles", "schema": "CREATE TABLE vehicle_stats (vehicle_make VARCHAR(255), vehicle_model VARCHAR(255), battery_range FLOAT);", "sql": "UPDATE vehicle_stats SET battery_range = 405 WHERE vehicle_make = 'Tesla' AND vehicle_model = 'Model S';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the average population of marine species in the Indian Ocean?", "schema": "CREATE TABLE marine_species_indian (name VARCHAR(255), region VARCHAR(255), population INT); INSERT INTO marine_species_indian (name, region, population) VALUES ('Manta Ray', 'Indian', 500), ('Whale Shark', 'Indian', 1000);", "sql": "SELECT AVG(population) FROM marine_species_indian WHERE region = 'Indian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Insert a new record into the menu_items table for item_name 'Tofu Stir Fry' with price set to 11.99", "schema": "CREATE TABLE menu_items (item_name VARCHAR(255), price DECIMAL(5,2));", "sql": "INSERT INTO menu_items (item_name, price) VALUES ('Tofu Stir Fry', 11.99);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many totals have 1 gold and a rank smaller than 2?", "schema": "CREATE TABLE table_name_32 (total VARCHAR, gold VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_32 WHERE gold = 1 AND rank < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which terminations correspond with headphone model SR80I?", "schema": "CREATE TABLE table_1601027_1 (termination VARCHAR, headphone_model VARCHAR)", "sql": "SELECT termination FROM table_1601027_1 WHERE headphone_model = 'SR80i';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 137).", "schema": null, "sql": "SELECT cube_distance('(2,3,4)'::cube,'(2,3,4)'::cube);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 1} {"question": "What are the names and balances of all digital assets that are of type 'ERC20' or 'ERC721'?", "schema": "CREATE TABLE digital_assets (name TEXT, balance INTEGER, type TEXT); INSERT INTO digital_assets (name, balance, type) VALUES ('Asset1', 100, 'ERC20'), ('Asset2', 200, 'ERC721');", "sql": "SELECT name, balance FROM digital_assets WHERE type IN ('ERC20', 'ERC721');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has a Decision of osgood, and a Score of 3 – 4?", "schema": "CREATE TABLE table_name_63 (date VARCHAR, decision VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_63 WHERE decision = 'osgood' AND score = '3 – 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the price of the most expensive garment in the 'Fall 2022' collection?", "schema": "CREATE TABLE garment_prices (collection VARCHAR(20), garment_name VARCHAR(30), price INT); INSERT INTO garment_prices (collection, garment_name, price) VALUES ('Fall 2022', 'Cashmere Sweater', 300);", "sql": "SELECT collection, MAX(price) FROM garment_prices GROUP BY collection;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of farmers who received training in the last 2 years from the 'Capacity Building' program in the 'ruraldev' schema?", "schema": "CREATE TABLE ruraldev.farmers (id INT, name VARCHAR(50), training_program VARCHAR(50), training_year INT); INSERT INTO ruraldev.farmers (id, name, training_program, training_year) VALUES (1, 'John Doe', 'Capacity Building', 2020), (2, 'Jane Smith', 'Soil Conservation', 2019), (3, 'Raj Patel', 'Capacity Building', 2021);", "sql": "SELECT COUNT(*) FROM ruraldev.farmers WHERE training_program = 'Capacity Building' AND training_year >= 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the average age of students who have taken a course on lifelong learning?", "schema": "CREATE TABLE Students (StudentID INT, Age INT, Gender VARCHAR(10), CoursesTaken VARCHAR(20)); INSERT INTO Students (StudentID, Age, Gender, CoursesTaken) VALUES (1, 22, 'Male', 'Lifelong Learning'); INSERT INTO Students (StudentID, Age, Gender, CoursesTaken) VALUES (2, 20, 'Female', 'Open Pedagogy'); INSERT INTO Students (StudentID, Age, Gender, CoursesTaken) VALUES (3, 25, 'Male', 'Lifelong Learning'); INSERT INTO Students (StudentID, Age, Gender, CoursesTaken) VALUES (4, 23, 'Female', 'Lifelong Learning');", "sql": "SELECT AVG(Age) FROM Students WHERE CoursesTaken = 'Lifelong Learning';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE WORST SCORE WHEN THE BEST SCORE WAS 8.8 AND AVERAGE WAS LARGER THAN 6.2?", "schema": "CREATE TABLE table_name_72 (worst_score VARCHAR, best_score VARCHAR, average VARCHAR)", "sql": "SELECT COUNT(worst_score) FROM table_name_72 WHERE best_score = 8.8 AND average > 6.2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 11).", "schema": null, "sql": "SELECT pg_input_is_valid('10000000000000000000', 'int8');", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_input_is_valid('10000000000000000000', 'int8')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "List all pollution control initiatives and their respective budgets.", "schema": "CREATE TABLE pollution_control_initiatives (initiative_id INT, name TEXT, budget INT); INSERT INTO pollution_control_initiatives (initiative_id, name, budget) VALUES (1, 'Project A', 100000), (2, 'Project B', 250000), (3, 'Project C', 180000);", "sql": "SELECT name, budget FROM pollution_control_initiatives;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What AP increase has an AP duration of 0.75?", "schema": "CREATE TABLE table_name_68 (ap_increase__mv_ VARCHAR, ap_duration__ms_ VARCHAR)", "sql": "SELECT ap_increase__mv_ FROM table_name_68 WHERE ap_duration__ms_ = '0.75';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Insert new donor records for the first quarter of 2023.", "schema": "CREATE TABLE Donors (DonorID int, FirstName varchar(50), LastName varchar(50)); INSERT INTO Donors (DonorID, FirstName, LastName) VALUES (1, 'John', 'Doe'), (2, 'Jane', 'Doe');", "sql": "INSERT INTO Donors (DonorID, FirstName, LastName) VALUES (3, 'Mohammad', 'Ali'), (4, 'Hanako', 'Yamada'), (5, 'Leonel', 'Messi');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the height of player James Stanton?", "schema": "CREATE TABLE table_name_63 (height VARCHAR, name VARCHAR)", "sql": "SELECT height FROM table_name_63 WHERE name = 'james stanton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: give me names of all compatible browsers and accelerators in the descending order of compatible year", "schema": "CREATE TABLE accelerator_compatible_browser (browser_id VARCHAR, accelerator_id VARCHAR, compatible_since_year VARCHAR); CREATE TABLE web_client_accelerator (name VARCHAR, id VARCHAR); CREATE TABLE browser (name VARCHAR, id VARCHAR)", "sql": "SELECT T2.name, T3.name FROM accelerator_compatible_browser AS T1 JOIN browser AS T2 ON T1.browser_id = T2.id JOIN web_client_accelerator AS T3 ON T1.accelerator_id = T3.id ORDER BY T1.compatible_since_year DESC;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "How many animals were admitted to the rescue center in the 'Forest' region?;", "schema": "CREATE TABLE rescue_center (id INT, animal_name VARCHAR(50), date_admitted DATE, region VARCHAR(20)); INSERT INTO rescue_center (id, animal_name, date_admitted, region) VALUES (1, 'Fox', '2021-01-05', 'Mountain'); INSERT INTO rescue_center (id, animal_name, date_admitted, region) VALUES (2, 'Eagle', '2021-06-10', 'Forest'); INSERT INTO rescue_center (id, animal_name, date_admitted, region) VALUES (3, 'Bear', '2021-07-15', 'Mountain');", "sql": "SELECT COUNT(animal_name) FROM rescue_center WHERE region = 'Forest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all network towers in the state of New York that have experienced outages in the past month.", "schema": "CREATE TABLE network_towers (tower_id INT, location VARCHAR(50), last_outage DATE); INSERT INTO network_towers (tower_id, location, last_outage) VALUES (1, 'New York City', '2022-01-15'); INSERT INTO network_towers (tower_id, location, last_outage) VALUES (2, 'Buffalo', '2022-02-03');", "sql": "SELECT * FROM network_towers WHERE last_outage >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Insert new records into the 'climate_finance' table with the following details: (1, 'Bilateral', 'Kenya', 25000)", "schema": "CREATE TABLE climate_finance (id INT, funding_source VARCHAR(255), country VARCHAR(255), amount FLOAT);", "sql": "INSERT INTO climate_finance (id, funding_source, country, amount) VALUES (1, 'Bilateral', 'Kenya', 25000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Display the number of marine protected areas and their total size in the Arctic region.", "schema": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(50), size FLOAT, ocean VARCHAR(20)); INSERT INTO marine_protected_areas (id, name, size, ocean) VALUES (1, 'Northwest Passage', 123000, 'Arctic'); INSERT INTO marine_protected_areas (id, name, size, ocean) VALUES (2, 'Arctic National Wildlife Refuge', 780000, 'Arctic');", "sql": "SELECT COUNT(*), SUM(size) FROM marine_protected_areas WHERE ocean = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Release Date, when Format is CD, and when Title is Tsar Wars?", "schema": "CREATE TABLE table_name_84 (release_date VARCHAR, format VARCHAR, title VARCHAR)", "sql": "SELECT release_date FROM table_name_84 WHERE format = 'cd' AND title = 'tsar wars';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 18).", "schema": null, "sql": "SELECT * FROM check_test(\n has_inherited_tables( 'nonesuch', 'Gimme more' ),\n false,\n 'has_inherited_tables(nonesuch, desc)',\n 'Gimme more',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Which landfills have a capacity over 100000 tons in 'landfill' table?", "schema": "CREATE TABLE landfill (name VARCHAR(50), capacity INT);", "sql": "SELECT * FROM landfill WHERE capacity > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the maximum number of games played by users from Canada?", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Country, GamesPlayed) VALUES (1, 'John Doe', 'USA', 100), (2, 'Jane Smith', 'Canada', 80), (3, 'Taro Yamada', 'Japan', 70), (4, 'Hana Nakamura', 'Japan', 60);", "sql": "SELECT MAX(GamesPlayed) FROM Players WHERE Country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 216).", "schema": null, "sql": "SELECT cube(array[10,20,30])->3;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 32, "num_statements": 1} {"question": "What is the top donor by total donation amount?", "schema": "CREATE TABLE donors (id INT, donor_name VARCHAR(50), total_donations DECIMAL(10, 2)); INSERT INTO donors (id, donor_name, total_donations) VALUES (1, 'John Doe', 500.00); INSERT INTO donors (id, donor_name, total_donations) VALUES (2, 'Jane Smith', 300.00);", "sql": "SELECT donor_name, total_donations FROM donors ORDER BY total_donations DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of games played by FC Barcelona?", "schema": "CREATE TABLE teams (id INT, name TEXT, city TEXT, league TEXT); INSERT INTO teams (id, name, city, league) VALUES (3, 'FC Barcelona', 'Barcelona', 'La Liga'); CREATE TABLE games (id INT, home_team_id INT, away_team_id INT);", "sql": "SELECT COUNT(*) FROM games WHERE home_team_id = (SELECT id FROM teams WHERE name = 'FC Barcelona' AND city = 'Barcelona') OR away_team_id = (SELECT id FROM teams WHERE name = 'FC Barcelona' AND city = 'Barcelona');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many schools have some students playing in goalie and mid positions.", "schema": "CREATE TABLE tryout (cName VARCHAR, pPos VARCHAR)", "sql": "SELECT COUNT(*) FROM (SELECT cName FROM tryout WHERE pPos = 'goalie' INTERSECT SELECT cName FROM tryout WHERE pPos = 'mid');", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "List all the co-owned properties in Oakland, CA and their owners.", "schema": "CREATE TABLE properties (id INT, city VARCHAR(50), price INT); CREATE TABLE co_owners (property_id INT, owner_name VARCHAR(50)); INSERT INTO properties (id, city, price) VALUES (1, 'Oakland', 500000), (2, 'San Francisco', 700000); INSERT INTO co_owners (property_id, owner_name) VALUES (1, 'Alice'), (1, 'Bob'), (2, 'Charlie');", "sql": "SELECT properties.city, co_owners.owner_name FROM properties INNER JOIN co_owners ON properties.id = co_owners.property_id WHERE properties.city = 'Oakland';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the number of donations made by first-time donors from the United Kingdom?", "schema": "CREATE TABLE Donations (DonationID int, DonorID int, DonationDate date);", "sql": "SELECT COUNT(*) FROM Donations D INNER JOIN (SELECT DISTINCT DonorID FROM Donations WHERE YEAR(DonationDate) = YEAR(CURRENT_DATE) - 1) FD ON D.DonorID = FD.DonorID WHERE Country = 'UK';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 24).", "schema": null, "sql": "select regexp_matches('a', 'a(?!b)b*');", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select regexp_matches('a', 'a(?!b)b*')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "What is the average age of all animals in the 'animal_profiles' table?", "schema": "CREATE TABLE animal_profiles (id INT, animal_name VARCHAR(50), age INT, species_id INT); INSERT INTO animal_profiles (id, animal_name, age, species_id) VALUES (1, 'Tiger', 5, 1001), (2, 'Polar Bear', 12, 1002), (3, 'Giant Panda', 8, 1003);", "sql": "SELECT AVG(age) FROM animal_profiles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue where the home team is Footscray?", "schema": "CREATE TABLE table_name_57 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_57 WHERE home_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the percentage where males equal 99?", "schema": "CREATE TABLE table_name_35 (percentage___percentage_ VARCHAR, males VARCHAR)", "sql": "SELECT percentage___percentage_ FROM table_name_35 WHERE males = '99';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Laps have Points of 29?", "schema": "CREATE TABLE table_name_23 (laps VARCHAR, points VARCHAR)", "sql": "SELECT laps FROM table_name_23 WHERE points = 29;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "List all countries involved in satellite launches?", "schema": "CREATE TABLE SatelliteLaunches (id INT, launch_country VARCHAR(50), launch_site VARCHAR(50)); CREATE TABLE Countries (id INT, name VARCHAR(50), code VARCHAR(3));", "sql": "SELECT DISTINCT SatelliteLaunches.launch_country FROM SatelliteLaunches JOIN Countries ON SatelliteLaunches.launch_country = Countries.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year was 1.35% the average vote per candidate?", "schema": "CREATE TABLE table_28819393_1 (year VARCHAR, average__percentage_of_vote_per_candidate VARCHAR)", "sql": "SELECT year FROM table_28819393_1 WHERE average__percentage_of_vote_per_candidate = '1.35';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average yield of crops grown by female farmers?", "schema": "CREATE TABLE Farmers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), location VARCHAR(50)); INSERT INTO Farmers (id, name, age, gender, location) VALUES (1, 'John Doe', 35, 'Male', 'USA'); INSERT INTO Farmers (id, name, age, gender, location) VALUES (2, 'Jane Smith', 40, 'Female', 'Canada'); CREATE TABLE Crops (id INT, farmer_id INT, crop_name VARCHAR(50), yield INT, price FLOAT); INSERT INTO Crops (id, farmer_id, crop_name, yield, price) VALUES (1, 1, 'Corn', 120, 2.5); INSERT INTO Crops (id, farmer_id, crop_name, yield, price) VALUES (2, 2, 'Wheat', 150, 3.2);", "sql": "SELECT AVG(c.yield) AS average_yield FROM Crops c JOIN Farmers f ON c.farmer_id = f.id WHERE f.gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What nation are carolina hermann / daniel hermann from?", "schema": "CREATE TABLE table_name_64 (nation VARCHAR, name VARCHAR)", "sql": "SELECT nation FROM table_name_64 WHERE name = 'carolina hermann / daniel hermann';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the date in the United Kingdom having a catalog of reveal50cd/lp?", "schema": "CREATE TABLE table_name_3 (date VARCHAR, catalog VARCHAR, region VARCHAR)", "sql": "SELECT date FROM table_name_3 WHERE catalog = 'reveal50cd/lp' AND region = 'united kingdom';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Which dishes have a higher price than their average rating?", "schema": "CREATE TABLE dishes (id INT, name TEXT, type TEXT, cost FLOAT); INSERT INTO dishes (id, name, type, cost) VALUES (1, 'Quinoa Salad', 'vegetarian', 7.50), (2, 'Chickpea Curry', 'vegetarian', 9.25), (3, 'Beef Stew', 'non-vegetarian', 12.00); CREATE TABLE orders (id INT, dish_id INT, customer_id INT, rating INT); INSERT INTO orders (id, dish_id, customer_id, rating) VALUES (1, 1, 101, 8), (2, 2, 102, 9), (3, 3, 103, 7), (4, 1, 104, 10), (5, 2, 105, 6);", "sql": "SELECT d.name, d.cost, o.rating FROM dishes d INNER JOIN orders o ON d.id = o.dish_id WHERE d.cost > o.rating;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Name has a Time of 01:56:52?", "schema": "CREATE TABLE table_name_46 (name VARCHAR, time VARCHAR)", "sql": "SELECT name FROM table_name_46 WHERE time = '01:56:52';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the Competition of europe/africa zone, group i, round robin, with a result of loss, in Antalya, and a Score of 1–2?", "schema": "CREATE TABLE table_name_19 (date VARCHAR, score VARCHAR, location VARCHAR, competition VARCHAR, result VARCHAR)", "sql": "SELECT date FROM table_name_19 WHERE competition = 'europe/africa zone, group i, round robin' AND result = 'loss' AND location = 'antalya' AND score = '1–2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 33).", "schema": null, "sql": "-- ************************************************************\n-- * AFTER UPDATE on PField\n-- *\t- Let PSlots of this field follow\n-- ************************************************************\ncreate function tg_pfield_au() returns trigger as '\nbegin\n if new.name != old.name then\n update PSlot set pfname = new.name where pfname = old.name;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 353, "num_statements": 1} {"question": "Identify players who experienced a decrease in performance", "schema": "CREATE TABLE playerperformances (player_id INT, game_id INT, match_date DATE, kills INT, deaths INT); INSERT INTO playerperformances (player_id, game_id, match_date, kills, deaths) VALUES (1, 1001, '2022-01-01', 25, 10);", "sql": "SELECT player_id, LAG(kills, 1) OVER (PARTITION BY player_id ORDER BY match_date) as prev_kills, deaths, prev_kills - deaths as performance_change FROM playerperformances;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 171, "num_statements": 1} {"question": "What is the earliest therapy session date?", "schema": "CREATE TABLE therapy_sessions (id INT, patient_id INT, session_date DATE);", "sql": "SELECT MIN(session_date) FROM therapy_sessions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Update the name column in the programs table to set the value to 'Housing Assistance' for the record with id = 2.", "schema": "CREATE TABLE programs (id INT, name VARCHAR(50)); INSERT INTO programs (id, name) VALUES (1, 'Education Support'), (2, 'Food Assistance'), (3, 'Health Services');", "sql": "UPDATE programs SET name = 'Housing Assistance' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest attendance for the opponents Club Brugge in Venue A?", "schema": "CREATE TABLE table_name_64 (attendance INTEGER, opponent VARCHAR, venue VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_64 WHERE opponent = 'club brugge' AND venue = 'a';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "List all healthcare facilities in rural Montana, along with the number of doctors and nurses employed at each facility.", "schema": "CREATE TABLE healthcare_facilities (id INT, name VARCHAR(100), location VARCHAR(50), num_doctors INT, num_nurses INT); INSERT INTO healthcare_facilities (id, name, location, num_doctors, num_nurses) VALUES (1, 'Rural Clinic', 'Montana', 5, 10);", "sql": "SELECT healthcare_facilities.name, healthcare_facilities.num_doctors, healthcare_facilities.num_nurses FROM healthcare_facilities WHERE healthcare_facilities.location = 'Montana';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 421).", "schema": null, "sql": "select count(*) from test_multirange_gist where mr >> int4range(100,500);", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_multirange_gist where mr >> int4range(100,500)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player's To par is −6?", "schema": "CREATE TABLE table_name_54 (player VARCHAR, to_par VARCHAR)", "sql": "SELECT player FROM table_name_54 WHERE to_par = '−6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the minimum age of patients diagnosed with anxiety in Germany?", "schema": "CREATE TABLE patient_diagnosis (patient_id INT, age INT, condition VARCHAR(255), country VARCHAR(255)); INSERT INTO patient_diagnosis (patient_id, age, condition, country) VALUES (1, 25, 'Anxiety', 'Germany'); INSERT INTO patient_diagnosis (patient_id, age, condition, country) VALUES (2, 30, 'Depression', 'Germany');", "sql": "SELECT MIN(age) FROM patient_diagnosis WHERE condition = 'Anxiety' AND country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Which public transportation system has the highest ridership in 'New York'?", "schema": "CREATE TABLE public.public_transit_ridership(id serial PRIMARY KEY, system varchar(255), location varchar(255), ridership int);", "sql": "SELECT system, MAX(ridership) FROM public.public_transit_ridership WHERE location = 'New York' GROUP BY system;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the population in 1931 for lubelskie", "schema": "CREATE TABLE table_14245_3 (population__1931__in_1 VARCHAR, voivodeship_or_city VARCHAR)", "sql": "SELECT population__1931__in_1, 000 AS s FROM table_14245_3 WHERE voivodeship_or_city = 'lubelskie';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 768).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('555.50');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the wastewater treatment capacity for each state?", "schema": "CREATE TABLE wastewater_treatment(state VARCHAR(20), treatment_capacity INT); INSERT INTO wastewater_treatment VALUES('New York', 50000), ('Texas', 40000);", "sql": "SELECT state, treatment_capacity FROM wastewater_treatment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Add a new marine life research station named 'Station C' in the Indian region.", "schema": "CREATE TABLE marine_life_research_stations (station_id INT, station_name TEXT, region TEXT);", "sql": "INSERT INTO marine_life_research_stations (station_id, station_name, region) VALUES ((SELECT MAX(station_id) + 1 FROM marine_life_research_stations), 'Station C', 'Indian');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "pgTAP test for Check (assertion 16).", "schema": null, "sql": "SELECT * FROM check_test(\n col_has_check( 'public', 'sometab', 'id', 'public.sometab.id should have a check' ),\n false,\n 'col_has_check( sch, tab, col, desc ) fail',\n 'public.sometab.id should have a check',\n ' have: {name}\n {numb,myint}\n want: {id}'\n);", "explanation": "SQL assertion from pgTAP test suite for Check.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 295, "num_statements": 1} {"question": "Obtain maintenance activities on specific equipment types", "schema": "CREATE TABLE equipment_maintenance (equipment_id INT, equipment_type VARCHAR(50), maintenance_date DATE, vendor_name VARCHAR(100), maintenance_type VARCHAR(50)); INSERT INTO equipment_maintenance (equipment_id, equipment_type, maintenance_date, vendor_name, maintenance_type) VALUES (1001, 'Fighter Jet', '2019-12-05', 'DEF Contractors', 'Preventive'); INSERT INTO equipment_maintenance (equipment_id, equipment_type, maintenance_date, vendor_name, maintenance_type) VALUES (1002, 'Tank', '2020-03-20', 'GHI Services', 'Corrective');", "sql": "SELECT * FROM equipment_maintenance WHERE equipment_type IN ('Fighter Jet', 'Tank');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 210).", "schema": null, "sql": "SELECT nummultirange(numrange(8,9)) <@ numrange(1,5);", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange(numrange(8,9)) <@ numrange(1,5)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the ranking of customer preferences for each cosmetic product?", "schema": "CREATE TABLE customer_preferences (customer_id INT, product_id INT, preference_score INT); INSERT INTO customer_preferences (customer_id, product_id, preference_score) VALUES (1, 1, 90), (1, 2, 70), (2, 1, 80), (2, 2, 85), (3, 1, 50), (3, 2, 95), (4, 1, 90), (4, 2, 80), (5, 1, 60), (5, 2, 90);", "sql": "SELECT customer_id, product_id, preference_score, RANK() OVER (PARTITION BY product_id ORDER BY preference_score DESC) as preference_rank FROM customer_preferences;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 164, "num_statements": 1} {"question": "Which chemical compounds have been used in safety protocols since 2020-01-01?", "schema": "CREATE TABLE safety_protocols (id INT, compound_name VARCHAR(255), last_use DATE); INSERT INTO safety_protocols (id, compound_name, last_use) VALUES (1, 'Compound A', '2019-12-31'), (2, 'Compound B', '2020-03-05'), (3, 'Compound C', '2018-11-17');", "sql": "SELECT compound_name FROM safety_protocols WHERE last_use >= '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many wells were drilled in 'Nigeria' in the year 2020?", "schema": "CREATE TABLE OilWells (WellID VARCHAR(10), DrillYear INT, Location VARCHAR(50));", "sql": "SELECT COUNT(*) FROM OilWells WHERE Location = 'Nigeria' AND DrillYear = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the minimum oxygen level for each depth in the 'oxygen_level' table?", "schema": "CREATE TABLE oxygen_level (depth INT, level FLOAT); INSERT INTO oxygen_level (depth, level) VALUES (100, 5.5), (200, 4.8);", "sql": "SELECT depth, MIN(level) as min_level FROM oxygen_level GROUP BY depth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average population of the regions?", "schema": "CREATE TABLE regions (id INT PRIMARY KEY, region VARCHAR(50), population INT); INSERT INTO regions (id, region, population) VALUES (1, 'Middle East', 2000), (2, 'Europe', 1500), (3, 'Asia', 800);", "sql": "SELECT region, AVG(population) as avg_population FROM regions GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the average project timeline for residential buildings in New York?", "schema": "CREATE TABLE project_timeline (timeline_id INT, project_id INT, building_type VARCHAR(20), city VARCHAR(20), days INT); INSERT INTO project_timeline (timeline_id, project_id, building_type, city, days) VALUES (1, 301, 'Commercial', 'Chicago', 90), (2, 302, 'Residential', 'Chicago', 60), (3, 303, 'Commercial', 'New York', 120), (4, 304, 'Residential', 'New York', 75);", "sql": "SELECT AVG(days) FROM project_timeline WHERE building_type = 'Residential' AND city = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "How many explainable AI research papers have been published by each country in the past year, and what is the average number of papers published per country?", "schema": "CREATE TABLE ExplainableAIPapers (id INT, paper_title VARCHAR(50), country VARCHAR(50), publication_date DATE); INSERT INTO ExplainableAIPapers (id, paper_title, country, publication_date) VALUES (1, 'LIME: A Unified Approach for Explaining Classifier Decisions', 'USA', '2023-01-01'), (2, 'SHAP: A Game Theoretic Approach to Explaining the Predictions of Any Machine Learning Model', 'Canada', '2023-02-01'), (3, 'Anchors: High-Precision Model-Agnostic Explanations', 'Germany', '2023-03-01'), (4, 'TreeExplainer: An Efficient Exact Algorithm for Model Agnostic Explanations', 'France', '2023-04-01'), (5, 'DeepLIFT: A Comprehensible Framework for Model-Agnostic Explanation', 'UK', '2023-05-01');", "sql": "SELECT country, COUNT(*) as paper_count FROM ExplainableAIPapers WHERE publication_date >= '2022-01-01' GROUP BY country; SELECT AVG(paper_count) as avg_paper_count FROM (SELECT country, COUNT(*) as paper_count FROM ExplainableAIPapers WHERE publication_date >= '2022-01-01' GROUP BY country) as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 305, "num_statements": 2} {"question": "What is the number of autonomous driving research papers published each year since 2015 in the 'research_papers' table?", "schema": "CREATE TABLE research_papers (title VARCHAR(100), author_country VARCHAR(50), publication_year INT);", "sql": "SELECT publication_year, COUNT(*) FROM research_papers WHERE publication_year >= 2015 GROUP BY publication_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the day of vacancy for vitória de guimarães?", "schema": "CREATE TABLE table_name_78 (date_of_vacancy VARCHAR, team VARCHAR)", "sql": "SELECT date_of_vacancy FROM table_name_78 WHERE team = 'vitória de guimarães';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Opponent has a Score of 3–6, 5–7?", "schema": "CREATE TABLE table_name_52 (opponent VARCHAR, score VARCHAR)", "sql": "SELECT opponent FROM table_name_52 WHERE score = '3–6, 5–7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Retrieve the number of machines of each type", "schema": "CREATE TABLE machines (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), status VARCHAR(255)); INSERT INTO machines (id, name, type, status) VALUES (1, 'Machine A', 'CNC', 'Operational'), (2, 'Machine B', 'Robotic Arm', 'Under Maintenance'), (3, 'Machine C', 'CNC', 'Operational'), (4, 'Machine D', 'Robotic Arm', 'Operational'), (5, 'Machine E', 'Conveyor Belt', 'Operational');", "sql": "SELECT type, COUNT(*) FROM machines GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the low score for the jaguar r3 chasis?", "schema": "CREATE TABLE table_name_90 (points INTEGER, chassis VARCHAR)", "sql": "SELECT MIN(points) FROM table_name_90 WHERE chassis = 'jaguar r3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many marine conservation organizations were founded in the year 2000?", "schema": "CREATE TABLE marine_conservation_orgs (org_name TEXT, country TEXT, year_founded INTEGER); INSERT INTO marine_conservation_orgs (org_name, country, year_founded) VALUES ('Ocean Conservancy', 'USA', 1972), ('Marine Conservation Society', 'UK', 1983), ('Coral Reef Alliance', 'USA', 1994);", "sql": "SELECT COUNT(*) FROM marine_conservation_orgs WHERE year_founded = 2000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What was the production trend for all countries from 2019 to 2020?", "schema": "CREATE TABLE production (country VARCHAR(255), year INT, amount INT); INSERT INTO production (country, year, amount) VALUES ('China', 2019, 120000), ('China', 2020, 140000), ('USA', 2019, 36000), ('USA', 2020, 38000), ('Australia', 2019, 18000), ('Australia', 2020, 20000), ('India', 2019, 4000), ('India', 2020, 5000);", "sql": "SELECT country, year, amount, LAG(amount, 1) OVER (PARTITION BY country ORDER BY year) AS previous_year_production FROM production;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 588).", "schema": null, "sql": "INSERT INTO fract_only VALUES (8, '0.00017');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Delete records of customers who have not paid their bills in the last 60 days from the billing table", "schema": "CREATE TABLE billing (customer_id INT, bill_amount DECIMAL(5,2), payment_date DATE);", "sql": "DELETE FROM billing WHERE payment_date < (CURRENT_DATE - INTERVAL '60' DAY);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year(s) was Duisburg runner-up?", "schema": "CREATE TABLE table_name_81 (years_runner_up VARCHAR, team VARCHAR)", "sql": "SELECT years_runner_up FROM table_name_81 WHERE team = 'duisburg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the round for marcus howard?", "schema": "CREATE TABLE table_name_32 (round INTEGER, name VARCHAR)", "sql": "SELECT SUM(round) FROM table_name_32 WHERE name = 'marcus howard';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what amount of stations have station code is awy?", "schema": "CREATE TABLE table_14688744_2 (station VARCHAR, station_code VARCHAR)", "sql": "SELECT COUNT(station) FROM table_14688744_2 WHERE station_code = 'AWY';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total donation amount by cause in Q1 2022, presented in a pivot table?", "schema": "CREATE TABLE donations (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE, cause VARCHAR(255)); INSERT INTO donations (donor_id, donation_amount, donation_date, cause) VALUES (1, 500, '2022-01-01', 'Education'); INSERT INTO donations (donor_id, donation_amount, donation_date, cause) VALUES (2, 250, '2022-01-15', 'Health'); INSERT INTO donations (donor_id, donation_amount, donation_date, cause) VALUES (3, 750, '2022-03-01', 'Environment');", "sql": "SELECT cause, SUM(donation_amount) AS total_donation_amount FROM donations WHERE donation_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY cause WITH ROLLUP;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which incumbent's democratic candidate was mike carroll?", "schema": "CREATE TABLE table_name_21 (incumbent VARCHAR, democratic VARCHAR)", "sql": "SELECT incumbent FROM table_name_21 WHERE democratic = 'mike carroll';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average cargo handling time for 'Port of Santos'?", "schema": "CREATE TABLE ports (id INT, name TEXT, handling_time INT); INSERT INTO ports (id, name, handling_time) VALUES (1, 'Port of Santos', 120), (2, 'Port of Oakland', 90), (3, 'Port of Singapore', 100);", "sql": "SELECT AVG(handling_time) FROM ports WHERE name = 'Port of Santos';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of digital divide initiatives in Africa and South America?", "schema": "CREATE TABLE digital_divide_initiatives (id INT, initiative_name VARCHAR(255), location VARCHAR(255), initiative_type VARCHAR(255));", "sql": "SELECT COUNT(*) FROM digital_divide_initiatives WHERE location IN ('Africa', 'South America') AND initiative_type = 'digital divide';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What are the total expenses for community development programs in Africa?", "schema": "CREATE TABLE programs (id INT, program_name VARCHAR(50), program_type VARCHAR(20), org_id INT, start_date DATE, end_date DATE, budget DECIMAL(10,2));", "sql": "SELECT SUM(budget) FROM programs WHERE program_type = 'Community Development' AND country_code = 'AF';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Which spacecraft has the longest duration in space?", "schema": "CREATE TABLE spacecraft (id INT, name VARCHAR(255), total_days_in_space INT); INSERT INTO spacecraft (id, name, total_days_in_space) VALUES (1, 'Voyager 1', 43902), (2, 'Voyager 2', 41484), (3, 'Cassini', 13303);", "sql": "SELECT name, total_days_in_space FROM spacecraft ORDER BY total_days_in_space DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest result for Jankovic at Indian Wells and round of SF?", "schema": "CREATE TABLE table_name_68 (jankovic INTEGER, round VARCHAR, tournament VARCHAR)", "sql": "SELECT MAX(jankovic) FROM table_name_68 WHERE round = 'sf' AND tournament = 'indian wells';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'subselect' (example 167).", "schema": null, "sql": "insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 limit 1)::text;", "explanation": "DML from PostgreSQL core regression test for Subselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country had an interview score of 9.40 and average of 9.44?", "schema": "CREATE TABLE table_11884814_3 (country VARCHAR, interview VARCHAR, average VARCHAR)", "sql": "SELECT country FROM table_11884814_3 WHERE interview = '9.40' AND average = '9.44';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Create a table for employee diversity metrics and insert data on gender, race, and veteran status", "schema": "CREATE TABLE diversity_metrics (id INT, employee_id INT, gender VARCHAR(50), race VARCHAR(50), veteran_status VARCHAR(50));", "sql": "CREATE TABLE diversity_metrics (id INT, employee_id INT, gender VARCHAR(50), race VARCHAR(50), veteran_status VARCHAR(50)) AS SELECT * FROM (VALUES (1, 123, 'Female', 'Asian', 'No'), (2, 234, 'Male', 'Black', 'Yes'), (3, 345, 'Non-binary', 'White', 'No'), (4, 456, 'Female', 'Latinx', 'No')) AS t(id, employee_id, gender, race, veteran_status);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 344, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the method of the match in the Road fc 12 at 5:00 with less than 3 rounds, and a loss result?", "schema": "CREATE TABLE table_name_72 (method VARCHAR, event VARCHAR, res VARCHAR, time VARCHAR, round VARCHAR)", "sql": "SELECT method FROM table_name_72 WHERE time = '5:00' AND round < 3 AND res = 'loss' AND event = 'road fc 12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Update the title of the painting with PaintingID 1 to 'The Starry Night'.", "schema": "CREATE TABLE Paintings (PaintingID INT, Title VARCHAR(50), ArtistID INT, YearCreated INT); INSERT INTO Paintings (PaintingID, Title, ArtistID, YearCreated) VALUES (1, 'Starry Night Sketch', 1, 1889);", "sql": "UPDATE Paintings SET Title = 'The Starry Night' WHERE PaintingID = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What's the total budget for programs in arts and environment?", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Budget DECIMAL(10,2), Category TEXT); INSERT INTO Programs (ProgramID, ProgramName, Budget, Category) VALUES (1, 'Eco Warriors', 8000.00, 'Environment');", "sql": "SELECT SUM(Budget) FROM Programs WHERE Category IN ('Arts', 'Environment');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance for the game where the away team was Stockport County?", "schema": "CREATE TABLE table_name_1 (attendance VARCHAR, away_team VARCHAR)", "sql": "SELECT attendance FROM table_name_1 WHERE away_team = 'stockport county';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Venue held the Olympic games in 2012?", "schema": "CREATE TABLE table_name_12 (venue VARCHAR, tournament VARCHAR, year VARCHAR)", "sql": "SELECT venue FROM table_name_12 WHERE tournament = 'olympic games' AND year = 2012;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If seed number is 2, what is the maximum amount of points?", "schema": "CREATE TABLE table_23501776_18 (points INTEGER, seed VARCHAR)", "sql": "SELECT MAX(points) FROM table_23501776_18 WHERE seed = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'explain': Write the SELECT query (example 22).", "schema": null, "sql": "select explain_filter_to_json('explain (settings, format json) select * from int8_tbl i8') #> '{0,Settings,plan_cache_mode}';", "explanation": "Regression test for Explain in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select explain_filter_to_json('explain (settings, format json) select * from int8_tbl i8') #> '{0,Settings,plan_cache_mode}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": true, "sql_length": 125, "num_statements": 1} {"question": "What is the minimum assets value for customers in 'North America'?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(50), region VARCHAR(20), assets DECIMAL(10,2)); INSERT INTO customers (id, name, region, assets) VALUES (1, 'John Doe', 'Southwest', 50000.00), (2, 'Jane Smith', 'Northeast', 75000.00), (3, 'Michael Johnson', 'North America', 30000.00), (4, 'Sarah Lee', 'North America', 40000.00);", "sql": "SELECT MIN(assets) FROM customers WHERE region = 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the outcome of the final with a partner of Brett Steven?", "schema": "CREATE TABLE table_name_62 (outcome VARCHAR, partner VARCHAR)", "sql": "SELECT outcome FROM table_name_62 WHERE partner = 'brett steven';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 226).", "schema": null, "sql": "select jsonb_path_match('[{\"a\": 1}, {\"a\": 2}, 3]', 'lax exists($[*].a)', silent => false);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_match('[{\"a\": 1}, {\"a\": 2}, 3]', 'lax exists($[*].a)', silent => false)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which record has a time of 0:13?", "schema": "CREATE TABLE table_name_87 (record VARCHAR, time VARCHAR)", "sql": "SELECT record FROM table_name_87 WHERE time = '0:13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which bridge was completed in 1928?", "schema": "CREATE TABLE table_name_57 (name VARCHAR, completed VARCHAR)", "sql": "SELECT name FROM table_name_57 WHERE completed = '1928';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the competition for half marathon", "schema": "CREATE TABLE table_name_70 (competition VARCHAR, notes VARCHAR)", "sql": "SELECT competition FROM table_name_70 WHERE notes = 'half marathon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the different nationalities and the number of journalists of each nationality.", "schema": "CREATE TABLE journalist (Nationality VARCHAR)", "sql": "SELECT Nationality, COUNT(*) FROM journalist GROUP BY Nationality;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many volunteers are there in total?", "schema": "CREATE TABLE volunteers( id INT PRIMARY KEY NOT NULL, name VARCHAR(50), age INT, city VARCHAR(30), country VARCHAR(30) ); INSERT INTO volunteers (id, name, age, city, country) VALUES (1, 'John Doe', 25, 'New York', 'USA'); INSERT INTO volunteers (id, name, age, city, country) VALUES (2, 'Jane Doe', 30, 'Los Angeles', 'USA');", "sql": "SELECT COUNT(*) FROM volunteers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Who are the traditional artists in Europe?", "schema": "CREATE TABLE TraditionalArtists (id INT, name VARCHAR(50), country VARCHAR(50), art_form VARCHAR(50)); INSERT INTO TraditionalArtists (id, name, country, art_form) VALUES (1, 'Pablo Picasso', 'Spain', 'Cubism'); INSERT INTO TraditionalArtists (id, name, country, art_form) VALUES (2, 'Vincent van Gogh', 'Netherlands', 'Post-Impressionism');", "sql": "SELECT TraditionalArtists.name FROM TraditionalArtists WHERE TraditionalArtists.country IN ('Albania', 'Andorra', 'Austria', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Kosovo', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Monaco', 'Montenegro', 'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Romania', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'The Netherlands', 'Turkey', 'Ukraine', 'United Kingdom', 'Vatican City');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 656, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the tonnage on 12 september 1942?", "schema": "CREATE TABLE table_name_91 (tonnage VARCHAR, date VARCHAR)", "sql": "SELECT tonnage FROM table_name_91 WHERE date = '12 september 1942';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE ATTENDANCE WITH A READING AWAY TEAM?", "schema": "CREATE TABLE table_name_24 (attendance VARCHAR, away_team VARCHAR)", "sql": "SELECT attendance FROM table_name_24 WHERE away_team = 'reading';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Update light intensity records for sensor 012 on 2023-03-06 to 10000 lux", "schema": "CREATE TABLE LightIntensityData (date DATE, intensity INT, sensor_id INT, FOREIGN KEY (sensor_id) REFERENCES SensorData(sensor_id));", "sql": "UPDATE LightIntensityData SET intensity = 10000 WHERE sensor_id = 12 AND date = '2023-03-06';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the total revenue generated from ticket sales for each sport in the 'sales' table?", "schema": "CREATE TABLE sales (sale_id INT, event VARCHAR(50), sport VARCHAR(20), price DECIMAL(5,2), quantity INT); INSERT INTO sales (sale_id, event, sport, price, quantity) VALUES (1, 'Game 1', 'Basketball', 100.00, 500); INSERT INTO sales (sale_id, event, sport, price, quantity) VALUES (2, 'Game 2', 'Soccer', 75.00, 750);", "sql": "SELECT sport, SUM(price * quantity) as total_revenue FROM sales GROUP BY sport;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total number of clinical trials conducted by BigPharma Inc. in the United States?", "schema": "CREATE TABLE clinical_trials (id INT, company VARCHAR(255), country VARCHAR(255), phase VARCHAR(255)); INSERT INTO clinical_trials (id, company, country, phase) VALUES (1, 'BigPharma Inc.', 'United States', 'Phase 3');", "sql": "SELECT COUNT(*) FROM clinical_trials WHERE company = 'BigPharma Inc.' AND country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the total funding received by each program type in Q1 and Q2 for the past two years, grouped by year?", "schema": "CREATE TABLE programs (program_id INT, program_name VARCHAR(50), program_type VARCHAR(20)); CREATE TABLE funding (funding_id INT, program_id INT, funding_amount DECIMAL(10,2), funding_date DATE); INSERT INTO programs (program_id, program_name, program_type) VALUES (1, 'Art Education', 'Education'), (2, 'Music Education', 'Education'), (3, 'Theater Performance', 'Performance'); INSERT INTO funding (funding_id, program_id, funding_amount, funding_date) VALUES (1, 1, 5000, '2022-01-01'), (2, 2, 3000, '2021-12-01'), (3, 3, 8000, '2020-05-01');", "sql": "SELECT DATEPART(yy, funding_date) AS year, program_type, SUM(funding_amount) AS total_funding FROM funding f INNER JOIN programs p ON f.program_id = p.program_id WHERE funding_date BETWEEN DATEADD(year, -2, '2022-01-01') AND '2022-06-30' GROUP BY DATEPART(yy, funding_date), program_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 288, "num_statements": 1} {"question": "What is the total amount of resources depleted by each mining site, and which sites have depleted more than 90% of their total resources?", "schema": "CREATE TABLE mining_sites (id INT, site_name TEXT, total_resources_available INT);CREATE TABLE resources_depleted (site_id INT, amount_depleted INT);", "sql": "SELECT s.site_name, SUM(r.amount_depleted) as total_depleted, s.total_resources_available FROM mining_sites s JOIN resources_depleted r ON s.id = r.site_id GROUP BY s.site_name HAVING SUM(r.amount_depleted) / s.total_resources_available > 0.9;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the names and birthdays of the top five players in terms of potential.", "schema": "CREATE TABLE Player_Attributes (player_api_id VARCHAR); CREATE TABLE Player (player_name VARCHAR, birthday VARCHAR, player_api_id VARCHAR)", "sql": "SELECT DISTINCT T1.player_name, T1.birthday FROM Player AS T1 JOIN Player_Attributes AS T2 ON T1.player_api_id = T2.player_api_id ORDER BY potential DESC LIMIT 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "What is the total quantity of electronics transported by vessels that docked at the Port of Los Angeles?", "schema": "CREATE TABLE Ports (PortID INT, PortName VARCHAR(100), City VARCHAR(100), Country VARCHAR(100)); INSERT INTO Ports (PortID, PortName, City, Country) VALUES (1, 'Port of Los Angeles', 'Los Angeles', 'USA'); INSERT INTO Ports (PortID, PortName, City, Country) VALUES (2, 'Port of Rotterdam', 'Rotterdam', 'Netherlands'); CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(100), VesselType VARCHAR(100), PortID INT); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (1, 'Ever Ace', 'Container Ship', 1); CREATE TABLE Cargo (CargoID INT, CargoName VARCHAR(100), Quantity INT, VesselID INT); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (1, 'Electronics', 10000, 1); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (2, 'Vehicles', 5000, 2); CREATE TABLE VesselPorts (VesselID INT, PortID INT); INSERT INTO VesselPorts (VesselID, PortID) VALUES (1, 1);", "sql": "SELECT SUM(Cargo.Quantity) FROM Cargo INNER JOIN Vessels ON Cargo.VesselID = Vessels.VesselID INNER JOIN VesselPorts ON Vessels.VesselID = VesselPorts.VesselID WHERE VesselPorts.PortID = 1 AND Cargo.CargoName = 'Electronics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "What is the average number of containers handled per day by vessels in the South China Sea in Q2 2020?", "schema": "CREATE TABLE Vessel_Stats (vessel_name TEXT, location TEXT, handling_date DATE, containers_handled INTEGER); INSERT INTO Vessel_Stats (vessel_name, location, handling_date, containers_handled) VALUES ('VesselA', 'South China Sea', '2020-04-01', 50), ('VesselB', 'South China Sea', '2020-04-02', 75), ('VesselC', 'South China Sea', '2020-05-01', 65), ('VesselD', 'South China Sea', '2020-05-02', 80);", "sql": "SELECT AVG(containers_handled/30.0) FROM Vessel_Stats WHERE location = 'South China Sea' AND handling_date >= '2020-04-01' AND handling_date <= '2020-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Insert a new bus route from 'Tokyo' to 'Osaka' with a distance of 500 km and a fare of ¥5000.", "schema": "CREATE TABLE routes (route_id INT, route_name TEXT); INSERT INTO routes (route_id, route_name) VALUES (101, 'Bus Route 101'), (102, 'Bus Route 102'), (105, 'Bus Route 105'), (501, 'Bus Route 501'), (502, 'Bus Route 502'); CREATE TABLE bus_routes (route_id INT, start_station TEXT, end_station TEXT, distance INT, fare DECIMAL);", "sql": "INSERT INTO bus_routes (route_id, start_station, end_station, distance, fare) VALUES (501, 'Tokyo', 'Osaka', 500, 5000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATEDB (example 1).", "schema": null, "sql": "$ createdb demo;", "explanation": "PostgreSQL CREATEDB command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 16, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who constructed the car with a grid over 19 that retired due to suspension?", "schema": "CREATE TABLE table_name_49 (constructor VARCHAR, grid VARCHAR, time_retired VARCHAR)", "sql": "SELECT constructor FROM table_name_49 WHERE grid > 19 AND time_retired = 'suspension';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 211).", "schema": null, "sql": "select interval '-1 week -2147483648 days';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '-1 week -2147483648 days') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total revenue from ticket sales for games with an attendance of more than 5000 people?", "schema": "CREATE TABLE ticket_prices (ticket_id INT, game_id INT, price DECIMAL(5,2));", "sql": "SELECT SUM(price * quantity) FROM ticket_sales JOIN ticket_prices ON ticket_sales.ticket_id = ticket_prices.ticket_id WHERE (SELECT COUNT(DISTINCT fan_id) FROM fans WHERE game_id = ticket_sales.game_id) > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "What is the minimum daily water consumption for the water treatment plant with ID 9 in the state of Colorado in 2023?", "schema": "CREATE TABLE water_treatment_plant (plant_id INT, state VARCHAR(50), year INT, month INT, day INT, water_consumption FLOAT); INSERT INTO water_treatment_plant (plant_id, state, year, month, day, water_consumption) VALUES (9, 'Colorado', 2023, 1, 1, 12345.6), (9, 'Colorado', 2023, 1, 2, 23456.7), (9, 'Colorado', 2023, 1, 3, 34567.8);", "sql": "SELECT MIN(water_consumption) as min_water_consumption FROM water_treatment_plant WHERE plant_id = 9 AND state = 'Colorado' AND year = 2023;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total of the roll with a Decile of 8, and an Area of hororata?", "schema": "CREATE TABLE table_name_73 (roll INTEGER, decile VARCHAR, area VARCHAR)", "sql": "SELECT SUM(roll) FROM table_name_73 WHERE decile = 8 AND area = 'hororata';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What are the total sales for each product category in Q1 of 2022?", "schema": "CREATE TABLE sales (product_id INT, product_name VARCHAR(100), category VARCHAR(50), sale_date DATE, revenue DECIMAL(10, 2)); INSERT INTO sales (product_id, product_name, category, sale_date, revenue) VALUES (1, 'Lipstick', 'Cosmetics', '2022-01-02', 25.99), (2, 'Foundation', 'Cosmetics', '2022-01-15', 34.99);", "sql": "SELECT category, SUM(revenue) AS total_sales FROM sales WHERE sale_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the class when the quantity perserved is 0 and the quantity made is 5?", "schema": "CREATE TABLE table_name_78 (class VARCHAR, quantity_preserved VARCHAR, quantity_made VARCHAR)", "sql": "SELECT class FROM table_name_78 WHERE quantity_preserved = '0' AND quantity_made = '5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What was the plastic recycling rate in 2019 compared to 2017?", "schema": "CREATE TABLE recycling_rates (id INT, material VARCHAR(20), year INT, recycling_rate DECIMAL(5,2)); INSERT INTO recycling_rates (id, material, year, recycling_rate) VALUES (1, 'plastic', 2017, 0.25), (2, 'plastic', 2018, 0.28), (3, 'plastic', 2019, 0.31), (4, 'paper', 2017, 0.60), (5, 'paper', 2018, 0.63), (6, 'paper', 2019, 0.66), (7, 'glass', 2017, 0.35), (8, 'glass', 2018, 0.37), (9, 'glass', 2019, 0.39);", "sql": "SELECT (recycling_rate - (SELECT recycling_rate FROM recycling_rates r2 WHERE r2.material = 'plastic' AND r2.year = 2017)) FROM recycling_rates WHERE material = 'plastic' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "What is the difference in the average age of players who prefer using VR technology and those who do not?", "schema": "CREATE TABLE PlayerAges (PlayerID INT, Age INT); INSERT INTO PlayerAges (PlayerID, Age) VALUES (1, 25), (2, 28), (3, 30), (4, 32), (5, 27); CREATE TABLE PlayerPreferences (PlayerID INT, Preference VARCHAR(50)); INSERT INTO PlayerPreferences (PlayerID, Preference) VALUES (1, 'VR'), (2, 'Non-VR'), (3, 'VR'), (4, 'Non-VR'), (5, 'Non-VR');", "sql": "(SELECT AVG(PlayerAges.Age) FROM PlayerAges JOIN PlayerPreferences ON PlayerAges.PlayerID = PlayerPreferences.PlayerID WHERE PlayerPreferences.Preference = 'VR' EXCEPT SELECT AVG(PlayerAges.Age) FROM PlayerAges JOIN PlayerPreferences ON PlayerAges.PlayerID = PlayerPreferences.PlayerID WHERE PlayerPreferences.Preference = 'Non-VR');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 333, "num_statements": 1} {"question": "What is the average recycling rate in Mumbai over the last 3 years?", "schema": "CREATE TABLE recycling_rates (city VARCHAR(50), year INT, recycling_rate DECIMAL(5,2)); INSERT INTO recycling_rates (city, year, recycling_rate) VALUES ('Mumbai', 2019, 0.55), ('Mumbai', 2020, 0.58), ('Mumbai', 2021, 0.62), ('Mumbai', 2022, 0.65);", "sql": "SELECT AVG(recycling_rate) FROM recycling_rates WHERE city = 'Mumbai' AND year BETWEEN 2019 AND 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the total number of transactions and total transaction value for each investment strategy in the last month?", "schema": "CREATE TABLE investment_strategies (id INT, strategy_name VARCHAR(255), account_id INT, transaction_value DECIMAL(10, 2), transaction_date DATE); INSERT INTO investment_strategies (id, strategy_name, account_id, transaction_value, transaction_date) VALUES (1, 'Conservative', 1, 50, '2022-01-01'), (2, 'Moderate', 1, 100, '2022-01-15'), (3, 'Aggressive', 2, 25, '2022-01-05'), (4, 'Conservative', 2, 75, '2022-01-30'), (5, 'Moderate', 3, 150, '2022-01-20');", "sql": "SELECT s.strategy_name, COUNT(s.id) AS num_transactions, SUM(s.transaction_value) AS total_transaction_value FROM investment_strategies s WHERE s.transaction_date >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY s.strategy_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "What is the total budget allocated for education and healthcare in each city?", "schema": "CREATE TABLE BudgetAllocation (Id INT, CityId INT, Category VARCHAR(50), Amount DECIMAL(10,2)); INSERT INTO BudgetAllocation (Id, CityId, Category, Amount) VALUES (1, 1, 'Education', 3000000), (2, 1, 'Healthcare', 4000000), (3, 2, 'Education', 6000000), (4, 2, 'Healthcare', 5000000);", "sql": "SELECT CityId, Category, SUM(Amount) AS TotalBudget FROM BudgetAllocation WHERE Category IN ('Education', 'Healthcare') GROUP BY CityId, Category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Calculate the percentage of visitors who attended exhibitions in each city.", "schema": "CREATE TABLE Exhibitions (id INT, city VARCHAR(20), visitor_id INT); CREATE TABLE Visitors (id INT, name VARCHAR(50));", "sql": "SELECT city, 100.0 * COUNT(DISTINCT Exhibitions.visitor_id) / (SELECT COUNT(DISTINCT Visitors.id) FROM Visitors) AS pct_visitors FROM Exhibitions GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 242).", "schema": null, "sql": "select interval '-2147483648 months -0.1 millennium';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '-2147483648 months -0.1 millennium') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the Red Wings game when Vancouver was the home team?", "schema": "CREATE TABLE table_name_71 (score VARCHAR, home VARCHAR)", "sql": "SELECT score FROM table_name_71 WHERE home = 'vancouver';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average longitude of stations that never had bike availability more than 10?", "schema": "CREATE TABLE station (long INTEGER, id VARCHAR, station_id VARCHAR, bikes_available INTEGER); CREATE TABLE status (long INTEGER, id VARCHAR, station_id VARCHAR, bikes_available INTEGER)", "sql": "SELECT AVG(long) FROM station WHERE NOT id IN (SELECT station_id FROM status GROUP BY station_id HAVING MAX(bikes_available) > 10);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 241).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_function ( NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the away score when North Melbourne was played?", "schema": "CREATE TABLE table_name_91 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_91 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the minimum and maximum word count of articles in the 'article_word_count' table?", "schema": "CREATE TABLE article_word_count (article_id INT, word_count INT, category VARCHAR(20)); INSERT INTO article_word_count (article_id, word_count, category) VALUES (1, 500, 'Politics'), (2, 800, 'Sports'), (3, 300, 'Politics'), (4, 1200, 'Sports');", "sql": "SELECT MIN(word_count) as min_word_count, MAX(word_count) as max_word_count FROM article_word_count;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the oberliga bayern for sv sandhausen for 1986-87", "schema": "CREATE TABLE table_14242137_4 (oberliga_bayern VARCHAR, oberliga_baden_württemberg VARCHAR, season VARCHAR)", "sql": "SELECT oberliga_bayern FROM table_14242137_4 WHERE oberliga_baden_württemberg = 'SV Sandhausen' AND season = '1986-87';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the first elected for alabama 3", "schema": "CREATE TABLE table_25030512_4 (first_elected VARCHAR, district VARCHAR)", "sql": "SELECT COUNT(first_elected) FROM table_25030512_4 WHERE district = 'Alabama 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many satellites have been launched by each company since 2010?", "schema": "CREATE TABLE satellites (id INT, company VARCHAR(50), launch_date DATE); INSERT INTO satellites (id, company, launch_date) VALUES (1, 'SpaceX', '2010-01-01'), (2, 'Blue Origin', '2015-04-29'), (3, 'SpaceX', '2018-02-06');", "sql": "SELECT company, COUNT(*) as total_satellites FROM satellites WHERE YEAR(launch_date) >= 2010 GROUP BY company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Determine the average wastewater production in cubic meters for the city of Los Angeles in the month of May 2021", "schema": "CREATE TABLE wastewater_production (id INT, city VARCHAR(50), production FLOAT, date DATE); INSERT INTO wastewater_production (id, city, production, date) VALUES (1, 'Los Angeles', 4500, '2021-05-01'); INSERT INTO wastewater_production (id, city, production, date) VALUES (2, 'Los Angeles', 4800, '2021-05-02');", "sql": "SELECT AVG(production) FROM wastewater_production WHERE city = 'Los Angeles' AND date >= '2021-05-01' AND date <= '2021-05-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the total number of startups founded by individuals who identify as LGBTQ+?", "schema": "CREATE TABLE companies (id INT, name TEXT, founding_year INT, founder_identifies_as_lgbtq BOOLEAN); INSERT INTO companies (id, name, founding_year, founder_identifies_as_lgbtq) VALUES (1, 'Delta Startups', 2020, true); INSERT INTO companies (id, name, founding_year, founder_identifies_as_lgbtq) VALUES (2, 'Epsilon Enterprises', 2018, false);", "sql": "SELECT COUNT(*) FROM companies WHERE founder_identifies_as_lgbtq = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the gold when Kim Hyang-Mi won the bronze?", "schema": "CREATE TABLE table_name_97 (gold VARCHAR, bronze VARCHAR)", "sql": "SELECT gold FROM table_name_97 WHERE bronze = 'kim hyang-mi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the goals for a lost of Involuntary suspension of Season (hurricane Rita)?", "schema": "CREATE TABLE table_name_44 (goals_for VARCHAR, lost VARCHAR)", "sql": "SELECT goals_for FROM table_name_44 WHERE lost = 'involuntary suspension of season (hurricane rita)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the production in 2010 with rank of 8?", "schema": "CREATE TABLE table_name_39 (rank VARCHAR)", "sql": "SELECT 2010 FROM table_name_39 WHERE rank = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Winning Score, when Date is 17 Jan 2010?", "schema": "CREATE TABLE table_name_45 (winning_score VARCHAR, date VARCHAR)", "sql": "SELECT winning_score FROM table_name_45 WHERE date = '17 jan 2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show the number of wells in the South Pacific that were drilled each year from 2010 to 2020.", "schema": "CREATE TABLE wells_south_pacific (id INT, location VARCHAR(20), drill_date DATE);", "sql": "SELECT drill_date, COUNT(*) FROM wells_south_pacific WHERE location LIKE 'South Pacific%' AND drill_date BETWEEN '2010-01-01' AND '2020-12-31' GROUP BY drill_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the total funding for bioprocess engineering projects in the UK?", "schema": "CREATE TABLE bioprocess_projects (id INT, project_name VARCHAR(50), location VARCHAR(50), funding_amount INT); INSERT INTO bioprocess_projects (id, project_name, location, funding_amount) VALUES (1, 'Project G', 'UK', 12000000); INSERT INTO bioprocess_projects (id, project_name, location, funding_amount) VALUES (2, 'Project H', 'USA', 10000000);", "sql": "SELECT SUM(funding_amount) FROM bioprocess_projects WHERE location = 'UK' AND technology = 'Bioprocess Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the total quantity of menu items sold in each country?", "schema": "CREATE TABLE menus (menu_id INT, item VARCHAR(255), category VARCHAR(255), price DECIMAL(10, 2)); INSERT INTO menus VALUES (1, 'Chicken Wings', 'Appetizers', 12.99); INSERT INTO menus VALUES (2, 'Beef Burger', 'Entrees', 15.99); INSERT INTO menus VALUES (3, 'Chocolate Cake', 'Desserts', 8.99); CREATE TABLE sales (sale_id INT, menu_id INT, quantity INT, country VARCHAR(255));", "sql": "SELECT s.country, SUM(s.quantity) as total_quantity FROM sales s GROUP BY s.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all locations which don't have a train station with at least 15 platforms.", "schema": "CREATE TABLE station (LOCATION VARCHAR, number_of_platforms VARCHAR)", "sql": "SELECT LOCATION FROM station EXCEPT SELECT LOCATION FROM station WHERE number_of_platforms >= 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Type has a Builder of avonside engine company, and a Number of 9?", "schema": "CREATE TABLE table_name_55 (type VARCHAR, builder VARCHAR, number VARCHAR)", "sql": "SELECT type FROM table_name_55 WHERE builder = 'avonside engine company' AND number = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name for me the tatsuhito takaiwa for Ryusuke Taguchi of taguchi (14:31)", "schema": "CREATE TABLE table_name_50 (tatsuhito_takaiwa VARCHAR, ryusuke_taguchi VARCHAR)", "sql": "SELECT tatsuhito_takaiwa FROM table_name_50 WHERE ryusuke_taguchi = 'taguchi (14:31)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "pgTAP test for Usergroup (assertion 33).", "schema": null, "sql": "SELECT * FROM check_test(\n is_member_of('meanies', current_user, 'whatever' ),\n true,\n 'is_member_of(meanies, current_user, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Usergroup.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "What is the average price of vegetarian dishes in the lunch category?", "schema": "CREATE TABLE menu (item_id INT, name TEXT, category TEXT, is_vegetarian BOOLEAN, price FLOAT); INSERT INTO menu (item_id, name, category, is_vegetarian, price) VALUES (1, 'Chickpea Curry', 'Lunch', true, 10.5), (2, 'Chicken Tikka Masala', 'Lunch', false, 13.0), (3, 'Quinoa Salad', 'Starters', true, 7.5);", "sql": "SELECT AVG(price) as avg_vegetarian_price FROM menu WHERE is_vegetarian = true AND category = 'Lunch';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the average amount of climate finance provided by each country for renewable energy projects in 2020?", "schema": "CREATE TABLE renewable_energy_projects (country VARCHAR(50), finance_amount NUMERIC(10, 2), year INT); INSERT INTO renewable_energy_projects (country, finance_amount, year) VALUES ('USA', 1500000, 2020), ('China', 2000000, 2020), ('India', 1200000, 2020);", "sql": "SELECT country, AVG(finance_amount) FROM renewable_energy_projects WHERE year = 2020 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Update the year of implementation for pollution control initiatives in the Arctic Ocean to 2022 if the year is older than 2010.", "schema": "CREATE TABLE pollution_control_initiatives (id INT, name TEXT, location TEXT, year INT); INSERT INTO pollution_control_initiatives (id, name, location, year) VALUES (1, 'Ocean Plastic Reduction Project', 'Arctic Ocean', 2006), (2, 'Coral Reef Protection Program', 'Arctic Ocean', 2017), (3, 'Marine Life Restoration Effort', 'Arctic Ocean', 2015);", "sql": "UPDATE pollution_control_initiatives SET year = 2022 WHERE location = 'Arctic Ocean' AND year < 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 27).", "schema": null, "sql": "SELECT timestamp 'infinity' = timestamp '+infinity' AS t;", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT timestamp 'infinity' = timestamp '+infinity' AS t) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what format was the release from the Nebt068 catalog on April 11, 2005 in?", "schema": "CREATE TABLE table_name_62 (format VARCHAR, date VARCHAR, catalog VARCHAR)", "sql": "SELECT format FROM table_name_62 WHERE date = 'april 11, 2005' AND catalog = 'nebt068';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location is the winning driver is Bill Vukovich II?", "schema": "CREATE TABLE table_22670216_1 (location VARCHAR, winning_driver VARCHAR)", "sql": "SELECT location FROM table_22670216_1 WHERE winning_driver = 'Bill Vukovich II';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "List the names of the cities where waste collection services have improved in the last 2 years based on citizen feedback.", "schema": "CREATE TABLE Cities(City VARCHAR(25)); CREATE TABLE Feedback(City VARCHAR(25), Year INT, Service VARCHAR(20), Satisfaction FLOAT); INSERT INTO Cities VALUES('CityA'), ('CityB'), ('CityC'); INSERT INTO Feedback VALUES('CityA', 2020, 'Waste Collection', 4.5), ('CityA', 2021, 'Waste Collection', 4.8), ('CityB', 2020, 'Waste Collection', 3.2), ('CityB', 2021, 'Waste Collection', 3.6), ('CityC', 2020, 'Waste Collection', 4.3), ('CityC', 2021, 'Waste Collection', 4.7);", "sql": "SELECT DISTINCT City FROM Feedback WHERE Service = 'Waste Collection' AND Satisfaction > 4 AND Year IN (2020, 2021) INTERSECT SELECT City FROM Cities;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the maximum investment amount for customers in the East region?", "schema": "CREATE TABLE investments (id INT, customer_id INT, amount FLOAT); INSERT INTO investments (id, customer_id, amount) VALUES (1, 1, 10000), (2, 2, 15000), (3, 3, 8000), (4, 1, 12000); CREATE TABLE customers (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO customers (id, name, region) VALUES (1, 'John Smith', 'East'), (2, 'Jane Doe', 'Northeast'), (3, 'Bob Johnson', 'Southeast'), (4, 'Alex Brown', 'West');", "sql": "SELECT MAX(amount) FROM investments JOIN customers ON investments.customer_id = customers.id WHERE customers.region = 'East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the document type code with fewer than 3 documents.", "schema": "CREATE TABLE Documents (document_type_code VARCHAR)", "sql": "SELECT document_type_code FROM Documents GROUP BY document_type_code HAVING COUNT(*) < 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Add the following new rare earth element to the production_data table: Gadolinium with a quantity of 450 from 2020", "schema": "CREATE TABLE production_data ( id INT PRIMARY KEY, year INT, refined_rare_earth_element TEXT, quantity INT );", "sql": "INSERT INTO production_data (id, year, refined_rare_earth_element, quantity) VALUES (5, 2020, 'Gadolinium', 450);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Final Score of 31-7 happen?", "schema": "CREATE TABLE table_name_64 (date VARCHAR, final_score VARCHAR)", "sql": "SELECT date FROM table_name_64 WHERE final_score = '31-7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum, minimum and average market share of the listed browsers?", "schema": "CREATE TABLE browser (market_share INTEGER)", "sql": "SELECT MAX(market_share), MIN(market_share), AVG(market_share) FROM browser;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "List all aircraft models and their manufacturers, sorted by manufacturer name.", "schema": "CREATE TABLE aircraft (id INT, model VARCHAR(255)); CREATE TABLE manufacturers (id INT, name VARCHAR(255)); INSERT INTO aircraft (id, model) VALUES (1, '737'), (2, '747'); INSERT INTO manufacturers (id, name) VALUES (1, 'Boeing'), (2, 'Airbus');", "sql": "SELECT aircraft.model, manufacturers.name FROM aircraft INNER JOIN manufacturers ON aircraft.id = manufacturers.id ORDER BY manufacturers.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "How many seals are in the Arctic Ocean?", "schema": "CREATE TABLE Animals (name VARCHAR(50), species VARCHAR(50), location VARCHAR(50)); INSERT INTO Animals (name, species, location) VALUES ('Seal 1', 'Seal', 'Arctic Ocean'), ('Seal 2', 'Seal', 'Arctic Ocean'), ('Walrus 1', 'Walrus', 'Arctic Ocean');", "sql": "SELECT COUNT(*) FROM Animals WHERE species = 'Seal' AND location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total number of hours spent on open pedagogy projects by students in each gender group?", "schema": "CREATE TABLE open_pedagogy_gender (student_id INT, gender TEXT, total_open_pedagogy_hours INT); INSERT INTO open_pedagogy_gender (student_id, gender, total_open_pedagogy_hours) VALUES (1, 'Female', 30), (2, 'Male', 45), (3, 'Female', 60);", "sql": "SELECT gender, SUM(total_open_pedagogy_hours) FROM open_pedagogy_gender GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which City has a Date of may 19, 1976?", "schema": "CREATE TABLE table_name_23 (city VARCHAR, date VARCHAR)", "sql": "SELECT city FROM table_name_23 WHERE date = 'may 19, 1976';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of bank branches that provided some loans.", "schema": "CREATE TABLE loan (branch_id VARCHAR); CREATE TABLE bank (bname VARCHAR, branch_id VARCHAR)", "sql": "SELECT DISTINCT T1.bname FROM bank AS T1 JOIN loan AS T2 ON T1.branch_id = T2.branch_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years was Audi Sport Team Joest in 3rd position?", "schema": "CREATE TABLE table_name_8 (year VARCHAR, team VARCHAR, pos VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_8 WHERE team = 'audi sport team joest' AND pos = '3rd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many items were sold in each region in the last month?", "schema": "CREATE TABLE sales (sale_id INT, product_id INT, quantity INT, sale_date DATE); INSERT INTO sales (sale_id, product_id, quantity, sale_date) VALUES (1, 1, 3, '2022-01-05'), (2, 2, 1, '2022-01-07'); CREATE TABLE product (product_id INT, product_name TEXT, region_id INT); INSERT INTO product (product_id, product_name, region_id) VALUES (1, 'Coca Cola', 1), (2, 'Pizza', 2); CREATE TABLE region (region_id INT, region_name TEXT); INSERT INTO region (region_id, region_name) VALUES (1, 'North'), (2, 'South');", "sql": "SELECT r.region_name, SUM(s.quantity) as total_sold FROM sales s JOIN product p ON s.product_id = p.product_id JOIN region r ON p.region_id = r.region_id WHERE s.sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY r.region_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "What is the total donation amount by day for the last week?", "schema": "CREATE TABLE donations (id INT, donation_amount DECIMAL, donation_date DATE); INSERT INTO donations (id, donation_amount, donation_date) VALUES (1, 100.00, '2022-01-01'), (2, 200.00, '2022-01-05');", "sql": "SELECT DATE(donation_date) as donation_day, SUM(donation_amount) as total_donations FROM donations WHERE donation_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY donation_day ORDER BY donation_day;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "How many charging stations are there in Beijing for electric cars?", "schema": "CREATE TABLE charging_stations(id INT, station_number INT, city VARCHAR(20), charger_type VARCHAR(20), operational BOOLEAN);", "sql": "SELECT COUNT(*) FROM charging_stations WHERE city = 'Beijing' AND charger_type = 'DC Fast Charger' AND operational = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many courses are offered?", "schema": "CREATE TABLE CLASS (crs_code VARCHAR)", "sql": "SELECT COUNT(DISTINCT crs_code) FROM CLASS;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "How many policyholders live in each state?", "schema": "CREATE TABLE policyholders (id INT, policyholder_name VARCHAR(50), state VARCHAR(20)); INSERT INTO policyholders (id, policyholder_name, state) VALUES (1, 'John Doe', 'Texas'), (2, 'Jane Smith', 'California'), (3, 'Alice Johnson', 'Texas'), (4, 'Bob Brown', 'New York');", "sql": "SELECT state, COUNT(*) as policyholder_count FROM policyholders GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When Kubb is in 9th, who is in 10th?", "schema": "CREATE TABLE table_17111812_1 (tenth VARCHAR, ninth VARCHAR)", "sql": "SELECT tenth FROM table_17111812_1 WHERE ninth = 'Kubb';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show names of actors that have appeared in musical with name \"The Phantom of the Opera\".", "schema": "CREATE TABLE actor (Name VARCHAR, Musical_ID VARCHAR); CREATE TABLE musical (Musical_ID VARCHAR, Name VARCHAR)", "sql": "SELECT T1.Name FROM actor AS T1 JOIN musical AS T2 ON T1.Musical_ID = T2.Musical_ID WHERE T2.Name = 'The Phantom of the Opera';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the total quantity of electronics transported by the Ever Ace?", "schema": "CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(100), VesselType VARCHAR(100), PortID INT); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (1, 'Ever Ace', 'Container Ship', 1); CREATE TABLE Cargo (CargoID INT, CargoName VARCHAR(100), Quantity INT, VesselID INT); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (1, 'Electronics', 10000, 1); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (2, 'Vehicles', 5000, 2);", "sql": "SELECT SUM(Cargo.Quantity) FROM Cargo WHERE Cargo.VesselID = 1 AND Cargo.CargoName = 'Electronics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the total number of green buildings in Africa?", "schema": "CREATE TABLE Green_Buildings (id INT, region VARCHAR(20), number_of_buildings INT); INSERT INTO Green_Buildings (id, region, number_of_buildings) VALUES (1, 'Europe', 5000), (2, 'Asia', 7000), (3, 'Africa', 3000);", "sql": "SELECT SUM(number_of_buildings) FROM Green_Buildings WHERE region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Manufacturer with a Fuel Type that is diesel?", "schema": "CREATE TABLE table_name_9 (manufacturer VARCHAR, fuel_type VARCHAR)", "sql": "SELECT manufacturer FROM table_name_9 WHERE fuel_type = 'diesel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total population of vulnerable underwater species by location?", "schema": "CREATE TABLE Vulnerable_Species(id INT, location VARCHAR(50), species VARCHAR(50), population INT, vulnerable_status VARCHAR(50));", "sql": "SELECT location, SUM(population) AS Total_Population FROM Vulnerable_Species WHERE vulnerable_status = 'Vulnerable' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date is associated with the Loss of Farrell (6-13)?", "schema": "CREATE TABLE table_name_20 (date VARCHAR, loss VARCHAR)", "sql": "SELECT date FROM table_name_20 WHERE loss = 'farrell (6-13)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game with a loss of Myers (5-6)?", "schema": "CREATE TABLE table_name_47 (score VARCHAR, loss VARCHAR)", "sql": "SELECT score FROM table_name_47 WHERE loss = 'myers (5-6)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: in electorate of 83850 what is the minimum s split vote", "schema": "CREATE TABLE table_15082102_3 (s_spoilt_vote INTEGER, electorate VARCHAR)", "sql": "SELECT MIN(s_spoilt_vote) FROM table_15082102_3 WHERE electorate = 83850;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the number of unique users who streamed each music festival genre?", "schema": "CREATE TABLE genre_artist (artist_id INT, genre VARCHAR(255));", "sql": "SELECT g.genre, COUNT(DISTINCT u.user_id) as num_users FROM user_streams u JOIN festival_performances f ON u.artist_id = f.artist_id JOIN genre_artist g ON f.artist_id = g.artist_id GROUP BY g.genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the ship launched when the commissioned or completed(*) is 6 june 1864?", "schema": "CREATE TABLE table_12592074_1 (launched VARCHAR, commissioned_or_completed_ VARCHAR, _ VARCHAR)", "sql": "SELECT launched FROM table_12592074_1 WHERE commissioned_or_completed_ * _ = '6 June 1864';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: tell me the total number of decile with a roll showing 251.", "schema": "CREATE TABLE table_name_90 (decile VARCHAR, roll VARCHAR)", "sql": "SELECT COUNT(decile) FROM table_name_90 WHERE roll = 251;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is 2004, when 2005 is \"Not Tier I\"?", "schema": "CREATE TABLE table_name_31 (Id VARCHAR)", "sql": "SELECT 2004 FROM table_name_31 WHERE 2005 = 'not tier i';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the prevalence of diabetes and hypertension among patients in indigenous communities in New Mexico?", "schema": "CREATE TABLE patients(id INT, name TEXT, community TEXT, diabetes BOOLEAN, hypertension BOOLEAN); INSERT INTO patients(id, name, community, diabetes, hypertension) VALUES (1, 'Patient A', 'New Mexico Indigenous', TRUE, TRUE), (2, 'Patient B', 'New Mexico Indigenous', FALSE, FALSE), (3, 'Patient C', 'California Non-Indigenous', TRUE, FALSE), (4, 'Patient D', 'Florida Non-Indigenous', FALSE, TRUE);", "sql": "SELECT COUNT(*) as patient_count, diabetes as diabetes_prevalence, hypertension as hypertension_prevalence FROM patients WHERE community = 'New Mexico Indigenous' GROUP BY diabetes, hypertension;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "How many community policing events were held in each district in the last year, ordered by the highest number of events?", "schema": "CREATE TABLE CommunityPolicing (id INT, district VARCHAR(20), event_date DATE);", "sql": "SELECT district, COUNT(*) as num_events FROM CommunityPolicing WHERE event_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY district ORDER BY num_events DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the total resource depletion cost for each mine?", "schema": "CREATE TABLE mine (id INT, name VARCHAR(50), location VARCHAR(50)); CREATE TABLE cost (mine_id INT, year INT, cost INT);", "sql": "SELECT mine.name, SUM(cost.cost) FROM cost JOIN mine ON cost.mine_id = mine.id GROUP BY mine.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "How many artworks were created by female artists in each country?", "schema": "CREATE TABLE artworks (id INT, name TEXT, artist TEXT, country TEXT); INSERT INTO artworks (id, name, artist, country) VALUES (1, 'Painting', 'Sarah Johnson', 'USA'), (2, 'Sculpture', 'Maria Rodriguez', 'Spain'), (3, 'Installation', 'Yumi Lee', 'South Korea');", "sql": "SELECT country, COUNT(*) as num_artworks FROM artworks WHERE artist LIKE '%female%' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the launched date of the destroyer completed 1 July 1936?", "schema": "CREATE TABLE table_name_28 (launched VARCHAR, completed VARCHAR)", "sql": "SELECT launched FROM table_name_28 WHERE completed = '1 july 1936';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Insert a new record with permit ID 456, contractor 'Green Construction', and timeline '2022-01-01 - 2022-04-30' in the building_projects table", "schema": "CREATE TABLE building_projects (permit_id INT, contractor VARCHAR(100), timeline DATE);", "sql": "INSERT INTO building_projects (permit_id, contractor, timeline) VALUES (456, 'Green Construction', '2022-01-01'::DATE, '2022-04-30'::DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 28).", "schema": null, "sql": "CREATE OPERATOR >= (\n LEFTARG = CITEXT,\n RIGHTARG = CITEXT,\n NEGATOR = <,\n COMMUTATOR = <=,\n PROCEDURE = citext_ge,\n RESTRICT = scalargtsel,\n JOIN = scalargtjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 205, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the locations of parties and the names of the party hosts in ascending order of the age of the host.", "schema": "CREATE TABLE party (Location VARCHAR, Party_ID VARCHAR); CREATE TABLE HOST (Name VARCHAR, Host_ID VARCHAR, Age VARCHAR); CREATE TABLE party_host (Host_ID VARCHAR, Party_ID VARCHAR)", "sql": "SELECT T3.Location, T2.Name FROM party_host AS T1 JOIN HOST AS T2 ON T1.Host_ID = T2.Host_ID JOIN party AS T3 ON T1.Party_ID = T3.Party_ID ORDER BY T2.Age;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "What is the total weight of all satellites launched by Japan in the satellites table?", "schema": "CREATE TABLE satellites (name TEXT, country TEXT, weight FLOAT); INSERT INTO satellites (name, country, weight) VALUES ('Hayabusa', 'Japan', 500.0);", "sql": "SELECT SUM(weight) FROM satellites WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Aggregate of Bayer Leverkusen opponents?", "schema": "CREATE TABLE table_name_56 (aggregate VARCHAR, opponents VARCHAR)", "sql": "SELECT aggregate FROM table_name_56 WHERE opponents = 'bayer leverkusen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which location led to a decision and a record of 1-2?", "schema": "CREATE TABLE table_name_36 (location VARCHAR, method VARCHAR, record VARCHAR)", "sql": "SELECT location FROM table_name_36 WHERE method = 'decision' AND record = '1-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different frequencies does the model with part number ado540biaa5do?", "schema": "CREATE TABLE table_13869651_3 (frequency VARCHAR, part_number_s_ VARCHAR)", "sql": "SELECT COUNT(frequency) FROM table_13869651_3 WHERE part_number_s_ = 'ADO540BIAA5DO';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average energy efficiency rating of residential buildings in California, grouped by city and construction period?", "schema": "CREATE TABLE residential_buildings (id INT, city VARCHAR(100), state VARCHAR(50), energy_efficiency_rating FLOAT, construction_period DATE); INSERT INTO residential_buildings (id, city, state, energy_efficiency_rating, construction_period) VALUES (1, 'City A', 'California', 80, '2000-01-01'); INSERT INTO residential_buildings (id, city, state, energy_efficiency_rating, construction_period) VALUES (2, 'City B', 'California', 85, '2005-01-01');", "sql": "SELECT city, YEAR(construction_period) AS construction_year, AVG(energy_efficiency_rating) AS avg_rating FROM residential_buildings WHERE state = 'California' GROUP BY city, YEAR(construction_period);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the original air date of the episode written by michael glassberg?", "schema": "CREATE TABLE table_26866299_1 (original_airdate VARCHAR, writer VARCHAR)", "sql": "SELECT original_airdate FROM table_26866299_1 WHERE writer = 'Michael Glassberg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Update the cost of an existing agricultural innovation project, identified by its id, to the new given cost.", "schema": "CREATE TABLE agricultural_innovation_projects (id INT, project_name VARCHAR(255), location VARCHAR(255), sector VARCHAR(255), cost FLOAT); INSERT INTO agricultural_innovation_projects (id, project_name, location, sector, cost) VALUES (1, 'Precision Agriculture', 'Country 1', 'Agriculture', 35000.00), (2, 'Drip Irrigation', 'Country 2', 'Agriculture', 28000.00);", "sql": "UPDATE agricultural_innovation_projects SET cost = 37000.00 WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 292).", "schema": null, "sql": "SELECT ia1 FROM jsonb_populate_record(NULL::jsbrec, '{\"ia1\": [1, \"2\", null, 4]}') q;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ia1 FROM jsonb_populate_record(NULL::jsbrec, '{\"ia1\": [1, \"2\", null, 4]}') q) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Player, when Previous Team (League) is \"Medicine Hat Tigers ( WHL )\", and when Year is less than 2002?", "schema": "CREATE TABLE table_name_90 (player VARCHAR, previous_team__league_ VARCHAR, year VARCHAR)", "sql": "SELECT player FROM table_name_90 WHERE previous_team__league_ = 'medicine hat tigers ( whl )' AND year < 2002;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the opponent for record 10-4", "schema": "CREATE TABLE table_18894744_5 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_18894744_5 WHERE record = '10-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average fare for buses in the 'city' schema, excluding fares less than $1?", "schema": "CREATE SCHEMA city; CREATE TABLE city.buses (id INT, fare DECIMAL); INSERT INTO city.buses (id, fare) VALUES (1, 2.50), (2, 1.75), (3, 3.00);", "sql": "SELECT AVG(fare) FROM city.buses WHERE fare > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 134).", "schema": null, "sql": "SELECT pg_stat_get_live_tuples(:drop_stats_test_xact_oid);", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_stat_get_live_tuples(:drop_stats_test_xact_oid)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the population in 2011 of banatski karlovac?", "schema": "CREATE TABLE table_2562572_44 (population__2011_ VARCHAR, settlement VARCHAR)", "sql": "SELECT population__2011_ FROM table_2562572_44 WHERE settlement = 'Banatski Karlovac';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What was the lowest peacekeeping mission duration for an African country in 2017?", "schema": "CREATE TABLE african_peacekeeping_missions (country VARCHAR(50), year INT, duration INT); INSERT INTO african_peacekeeping_missions (country, year, duration) VALUES ('Mali', 2017, 120), ('South Sudan', 2017, 150), ('Central African Republic', 2017, 180), ('DRC', 2017, 210), ('Somalia', 2017, 240), ('Western Sahara', 2017, 270);", "sql": "SELECT MIN(duration) FROM african_peacekeeping_missions WHERE year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What time slots have a 6.3 rating", "schema": "CREATE TABLE table_11244302_1 (time_slot__est_ VARCHAR, rating VARCHAR)", "sql": "SELECT time_slot__est_ FROM table_11244302_1 WHERE rating = '6.3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Bronze is the lowest one that has a Nation of total, and a Gold smaller than 14?", "schema": "CREATE TABLE table_name_38 (bronze INTEGER, nation VARCHAR, gold VARCHAR)", "sql": "SELECT MIN(bronze) FROM table_name_38 WHERE nation = 'total' AND gold < 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many European destinations did Russian tourists visit in 2020?", "schema": "CREATE TABLE tourism_stats (id INT PRIMARY KEY, year INT, country VARCHAR(255), destination VARCHAR(255)); INSERT INTO tourism_stats (id, year, country, destination) VALUES (1, 2020, 'Russia', 'France'), (2, 2020, 'Russia', 'Italy'), (3, 2020, 'Russia', 'Spain'), (4, 2020, 'Russia', 'Germany');", "sql": "SELECT COUNT(DISTINCT destination) FROM tourism_stats WHERE country = 'Russia' AND destination LIKE 'Europe%' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years is ur sulcus listed with a diameter less than 1,145.0?", "schema": "CREATE TABLE table_name_68 (year_named VARCHAR, name VARCHAR, diameter VARCHAR)", "sql": "SELECT COUNT(year_named) FROM table_name_68 WHERE name = 'ur sulcus' AND diameter < 1 OFFSET 145.0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What kind of Tongan has a North Marquesan of /haʔe/?", "schema": "CREATE TABLE table_name_89 (tongan VARCHAR, north_marquesan VARCHAR)", "sql": "SELECT tongan FROM table_name_89 WHERE north_marquesan = '/haʔe/';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum speed recorded for a shared electric bicycle in Berlin, Germany?", "schema": "CREATE TABLE shared_ebikes (ebike_id INT, ride_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, start_location TEXT, end_location TEXT, city TEXT, max_speed DECIMAL);", "sql": "SELECT MAX(max_speed) FROM shared_ebikes WHERE city = 'Berlin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Silver has a Total of 7, and a Gold larger than 1?", "schema": "CREATE TABLE table_name_10 (silver INTEGER, total VARCHAR, gold VARCHAR)", "sql": "SELECT AVG(silver) FROM table_name_10 WHERE total = 7 AND gold > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 1021).", "schema": null, "sql": "CREATE TABLE fk_d PARTITION OF fk DEFAULT;", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date for the Champs Sports Bowl?", "schema": "CREATE TABLE table_20996923_20 (date VARCHAR, bowl_game VARCHAR)", "sql": "SELECT date FROM table_20996923_20 WHERE bowl_game = 'Champs Sports Bowl';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show the total number of ad clicks and the click-through rate (CTR) for each advertising campaign in the last quarter.", "schema": "CREATE TABLE campaigns (campaign_id INT, campaign_name VARCHAR(255), start_date DATE, end_date DATE); CREATE TABLE ad_impressions (ad_id INT, campaign_id INT, impressions INT, click_date DATE);", "sql": "SELECT c.campaign_name, SUM(ai.impressions) as total_impressions, SUM(ai.clicks) as total_clicks, SUM(ai.clicks) / SUM(ai.impressions) as ctr FROM campaigns c INNER JOIN ad_impressions ai ON c.campaign_id = ai.campaign_id WHERE ai.click_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 3 MONTH) AND CURDATE() GROUP BY c.campaign_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 327, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average wins when teams is lions and the percent is more than 0?", "schema": "CREATE TABLE table_name_31 (wins INTEGER, teams VARCHAR, percent VARCHAR)", "sql": "SELECT AVG(wins) FROM table_name_31 WHERE teams = 'lions' AND percent > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many financial capability training sessions were conducted in Brazil in H1 of 2022?", "schema": "CREATE TABLE financial_capability_training_brazil (id INT, semester INT, year INT, sessions INT); INSERT INTO financial_capability_training_brazil (id, semester, year, sessions) VALUES (1, 1, 2022, 25), (2, 1, 2022, 30), (3, 2, 2022, 40);", "sql": "SELECT SUM(sessions) FROM financial_capability_training_brazil WHERE semester IN (1, 2) AND year = 2022 AND country = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Kickoff Time on January 7, 2002?", "schema": "CREATE TABLE table_name_72 (kickoff_time VARCHAR, date VARCHAR)", "sql": "SELECT kickoff_time FROM table_name_72 WHERE date = 'january 7, 2002';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Surface has a Partner of galina voskoboeva?", "schema": "CREATE TABLE table_name_45 (surface VARCHAR, partner VARCHAR)", "sql": "SELECT surface FROM table_name_45 WHERE partner = 'galina voskoboeva';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total wastewater treatment capacity for each city in India?", "schema": "CREATE TABLE wastewater_treatment_india(id INT, city VARCHAR(50), treatment_type VARCHAR(50), capacity INT, efficiency FLOAT); INSERT INTO wastewater_treatment_india(id, city, treatment_type, capacity, efficiency) VALUES (1, 'Mumbai', 'Screening', 1200, 0.92);", "sql": "SELECT city, SUM(capacity) as total_capacity FROM wastewater_treatment_india GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times was performer 3 Kate Robbins?", "schema": "CREATE TABLE table_14934885_1 (date VARCHAR, performer_3 VARCHAR)", "sql": "SELECT COUNT(date) FROM table_14934885_1 WHERE performer_3 = 'Kate Robbins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'numeric' (example 1).", "schema": null, "sql": "CREATE TABLE num_data (id int4, val numeric(210,10));", "explanation": "DDL from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'identity' (example 180).", "schema": null, "sql": "INSERT into pitest2_p1 (f1, f2) VALUES ('2016-07-7', 'from pitest2_p1'); -- error\nINSERT into pitest2_p1 (f1, f2, f3) VALUES ('2016-07-7', 'from pitest2_p1', 2000);", "explanation": "DML from PostgreSQL core regression test for Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 2} {"question": "What is the average song_length in the reggae genre?", "schema": "CREATE TABLE genres (genre VARCHAR(10), song_id INT, song_length FLOAT); INSERT INTO genres (genre, song_id, song_length) VALUES ('reggae', 16, 210.5), ('reggae', 17, 225.8), ('reggae', 18, 195.4);", "sql": "SELECT AVG(song_length) FROM genres WHERE genre = 'reggae';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Identify the total count of cases and total billable hours for attorneys in the 'billing' and 'cases' tables, grouped by attorney ethnicity.", "schema": "CREATE TABLE attorney_ethnicity (attorney_id INT, ethnicity VARCHAR(30)); INSERT INTO attorney_ethnicity VALUES (1, 'Hispanic'), (2, 'Asian'), (3, 'African American'), (4, 'Caucasian'); CREATE TABLE billing (attorney_id INT, hours DECIMAL(5,2)); CREATE TABLE cases (case_id INT, attorney_id INT);", "sql": "SELECT e.ethnicity, COUNT(c.attorney_id) AS total_cases, SUM(b.hours) AS total_hours FROM attorney_ethnicity e JOIN billing b ON e.attorney_id = b.attorney_id JOIN cases c ON e.attorney_id = c.attorney_id GROUP BY e.ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "Which public transportation modes have the highest passenger capacity in Berlin, Germany?", "schema": "CREATE TABLE public_transportation (transport_id INT, type TEXT, capacity INT, in_service BOOLEAN, city TEXT);", "sql": "SELECT type, MAX(capacity) FROM public_transportation WHERE city = 'Berlin' AND in_service = TRUE GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What is the distribution of user interactions (like, comment, share) by article?", "schema": "CREATE TABLE user_interactions_by_article (user_id text, article_id text, interaction text); INSERT INTO user_interactions_by_article (user_id, article_id, interaction) VALUES ('User 7', 'Article 11', 'like'); INSERT INTO user_interactions_by_article (user_id, article_id, interaction) VALUES ('User 8', 'Article 11', 'comment');", "sql": "SELECT article_id, interaction, COUNT(*) as count FROM user_interactions_by_article GROUP BY article_id, interaction;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result of Saturday that's ថ្ងៃអាទិត្យ [tŋaj ʔaatɨt ] on Sunday?", "schema": "CREATE TABLE table_name_30 (saturday_shani__saturn_ VARCHAR, sunday_surya__the_sun_ VARCHAR)", "sql": "SELECT saturday_shani__saturn_ FROM table_name_30 WHERE sunday_surya__the_sun_ = 'ថ្ងៃអាទិត្យ [tŋaj ʔaatɨt ]';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the margin of error when the undicided is 8% and other is 4%?", "schema": "CREATE TABLE table_name_77 (margin_of_error VARCHAR, undecided VARCHAR, other VARCHAR)", "sql": "SELECT margin_of_error FROM table_name_77 WHERE undecided = '8%' AND other = '4%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 22).", "schema": null, "sql": "SELECT citext_cmp('AARDVARK'::citext, 'AARDVARK'::citext) AS zero;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total budget for movies released in 2017 and 2018?", "schema": "CREATE TABLE Movies (title VARCHAR(255), release_year INT, budget INT); INSERT INTO Movies (title, release_year, budget) VALUES ('Movie1', 2015, 50000000), ('Movie2', 2016, 75000000), ('Movie3', 2017, 60000000), ('Movie4', 2018, 80000000), ('Movie5', 2019, 90000000);", "sql": "SELECT SUM(budget) FROM Movies WHERE release_year IN (2017, 2018);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the minimum number of cases handled by restorative justice programs in Washington D.C. in a single year?", "schema": "CREATE TABLE restorative_justice_programs (program_id INT, cases_handled INT, year INT, district VARCHAR(20)); INSERT INTO restorative_justice_programs (program_id, cases_handled, year, district) VALUES (1, 100, 2021, 'Washington D.C.'), (2, 200, 2020, 'Washington D.C.'), (3, 150, 2019, 'Washington D.C.'), (4, 250, 2018, 'Washington D.C.'), (5, 300, 2017, 'Washington D.C.');", "sql": "SELECT MIN(cases_handled) FROM restorative_justice_programs WHERE year >= 2017 AND district = 'Washington D.C.';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "List all seafood providers based in California for 'Fresh Catch'?", "schema": "CREATE TABLE FreshCatch (id INT, provider VARCHAR(30), product VARCHAR(20), state VARCHAR(20)); INSERT INTO FreshCatch (id, provider, product, state) VALUES (1, 'Sea Breeze', 'Tuna', 'California'), (2, 'Pacific Harvest', 'Salmon', 'Oregon');", "sql": "SELECT DISTINCT provider FROM FreshCatch WHERE state = 'California' AND product LIKE '%seafood%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the maximum length of stay in prison for individuals who have been released in the past year, grouped by their offense type?", "schema": "CREATE TABLE prison_releases (id INT, offense_type TEXT, release_date DATE, length_of_stay INT);", "sql": "SELECT offense_type, MAX(length_of_stay) FROM prison_releases WHERE release_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY offense_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_visibility' (example 63).", "schema": null, "sql": "select count(*) > 0 from pg_visibility_map('test_partition');", "explanation": "Example query from the 'pg_visibility' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Which customers have not made any transactions in the last 3 months for an investment business?", "schema": "CREATE TABLE customers (customer_id INT, name VARCHAR(255)); CREATE TABLE investment_transactions (transaction_id INT, customer_id INT, amount DECIMAL(10,2), trans_date DATE);", "sql": "SELECT customers.name FROM customers LEFT JOIN investment_transactions ON customers.customer_id = investment_transactions.customer_id WHERE investment_transactions.trans_date IS NULL OR investment_transactions.trans_date < NOW() - INTERVAL '3 months';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 251, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the grid total that has a Time/Retired of + 1:33.141, and under 70 laps?", "schema": "CREATE TABLE table_name_27 (grid INTEGER, time_retired VARCHAR, laps VARCHAR)", "sql": "SELECT SUM(grid) FROM table_name_27 WHERE time_retired = '+ 1:33.141' AND laps < 70;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What opponent has a loss of McCaskill (9-11)?", "schema": "CREATE TABLE table_name_10 (opponent VARCHAR, loss VARCHAR)", "sql": "SELECT opponent FROM table_name_10 WHERE loss = 'mccaskill (9-11)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the game with a 3-0 record?", "schema": "CREATE TABLE table_name_67 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_67 WHERE record = '3-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the number of clients living in each city?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(50), age INT, city VARCHAR(50)); INSERT INTO clients VALUES (1, 'John Doe', 55, 'New York'), (2, 'Jane Smith', 45, 'Los Angeles'), (3, 'Mike Johnson', 58, 'New York'), (4, 'Alice Davis', 35, 'Chicago'), (5, 'Bob Brown', 40, 'New York');", "sql": "SELECT city, COUNT(*) FROM clients GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest of the Highest score for the Quickstep Dance and the Lowest score under 16?", "schema": "CREATE TABLE table_name_72 (highest_score INTEGER, dance VARCHAR, lowest_score VARCHAR)", "sql": "SELECT MIN(highest_score) FROM table_name_72 WHERE dance = 'quickstep' AND lowest_score < 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'geometry': Write the SELECT query (example 97).", "schema": null, "sql": "SELECT npoints(f1) AS npoints, f1 AS polygon\n FROM POLYGON_TBL;", "explanation": "Regression test for Geometry in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT npoints(f1) AS npoints, f1 AS polygon\n FROM POLYGON_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 535).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION is_aggregate( NAME, NAME, NAME[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the notes of the 400 m event before 2004 with a position of 12th (h)?", "schema": "CREATE TABLE table_name_69 (notes VARCHAR, position VARCHAR, event VARCHAR, year VARCHAR)", "sql": "SELECT notes FROM table_name_69 WHERE event = '400 m' AND year < 2004 AND position = '12th (h)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total value of military equipment sales to the United Kingdom and Canada combined from 2018 to 2021?", "schema": "CREATE TABLE MilitaryEquipmentSales (id INT PRIMARY KEY, sale_year INT, equipment_type VARCHAR(50), country VARCHAR(50), sale_value FLOAT); INSERT INTO MilitaryEquipmentSales (id, sale_year, equipment_type, country, sale_value) VALUES (1, 2020, 'Aircraft', 'United States', 12000000), (2, 2021, 'Vehicles', 'United States', 8000000), (3, 2018, 'Naval Equipment', 'United Kingdom', 10000000), (4, 2019, 'Radars', 'Canada', 5000000);", "sql": "SELECT SUM(sale_value) FROM MilitaryEquipmentSales WHERE (country = 'United Kingdom' OR country = 'Canada') AND sale_year BETWEEN 2018 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the socket is bga956 and processor is penryn-3m, what is the tdp?", "schema": "CREATE TABLE table_24100843_1 (tdp VARCHAR, socket VARCHAR, processor VARCHAR)", "sql": "SELECT tdp FROM table_24100843_1 WHERE socket = 'BGA956' AND processor = 'Penryn-3M';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "How many labor accidents occurred in mining sites located in the Amazon rainforest in 2021?", "schema": "CREATE TABLE MiningSites (SiteID int, SiteName varchar(50), Location varchar(50)); INSERT INTO MiningSites VALUES (1, 'SiteA', 'Amazon Rainforest'), (2, 'SiteB', 'Andes Mountains'), (3, 'SiteC', 'Sahara Desert'); CREATE TABLE AccidentData (SiteID int, AccidentDate date); INSERT INTO AccidentData VALUES (1, '2021-03-15'), (1, '2021-06-27'), (3, '2021-09-04'), (2, '2021-12-18'), (1, '2021-11-29');", "sql": "SELECT COUNT(*) as LaborAccidents FROM AccidentData ad JOIN MiningSites ms ON ad.SiteID = ms.SiteID WHERE ms.Location = 'Amazon Rainforest' AND ad.AccidentDate BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Champion, when Year is greater than 2007, and when Finalist is \"Asvel\"?", "schema": "CREATE TABLE table_name_71 (champion VARCHAR, year VARCHAR, finalist VARCHAR)", "sql": "SELECT champion FROM table_name_71 WHERE year > 2007 AND finalist = 'asvel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'tablespace': Write the SELECT query (example 55).", "schema": null, "sql": "SELECT b.relname,\n CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged'\n ELSE 'relfilenode has changed' END AS filenode,\n CASE WHEN a.reltablespace = b.reltablespace THEN 'reltablespace is unchanged'\n ELSE 'reltablespace has changed' END AS tbspace\n FROM reindex_temp_before b JOIN pg_class a ON b.relname = a.relname\n ORDER BY 1;", "explanation": "Regression test for Tablespace in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT b.relname,\n CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged'\n ELSE 'relfilenode has changed' END AS filenode,\n CASE WHEN a.reltablespace = b.reltablespace THEN 'reltablespace is unchanged'\n ELSE 'reltablespace has changed' END AS tbspace\n FROM reindex_temp_before b JOIN pg_class a ON b.relname = a.relname\n ORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 375, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_aggregate' (example 38).", "schema": null, "sql": "-- Ensure all these functions made it into the catalog\nSELECT aggfnoid, aggtransfn, aggcombinefn, aggtranstype::regtype,\n aggserialfn, aggdeserialfn, aggfinalmodify\nFROM pg_aggregate\nWHERE aggfnoid = 'myavg'::REGPROC;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "What is the total revenue generated from domestic shipments?", "schema": "CREATE TABLE shipments (id INT, shipment_type VARCHAR(10), revenue DECIMAL(10,2)); INSERT INTO shipments (id, shipment_type, revenue) VALUES (1, 'domestic', 500.00), (2, 'international', 800.00);", "sql": "SELECT SUM(revenue) FROM shipments WHERE shipment_type = 'domestic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average word count of articles in each country, for articles published in the 'politics' category?", "schema": "CREATE TABLE countries (id INT, name TEXT); CREATE TABLE categories (id INT, name TEXT); CREATE TABLE articles (id INT, title TEXT, content TEXT, category_id INT, country_id INT); INSERT INTO countries (id, name) VALUES (1, 'USA'), (2, 'Canada'), (3, 'Mexico'); INSERT INTO categories (id, name) VALUES (1, 'Politics'), (2, 'Technology'), (3, 'Sports'); INSERT INTO articles (id, title, content, category_id, country_id) VALUES (1, 'Article 1', 'Content 1', 1, 1), (2, 'Article 2', 'Content 2', 2, 2), (3, 'Article 3', 'Content 3', 1, 3), (4, 'Article 4', 'Content 4', 1, 1), (5, 'Article 5', 'Content 5', 3, 2);", "sql": "SELECT countries.name, AVG(LENGTH(articles.content) - LENGTH(REPLACE(articles.content, ' ', '')) + 1) as avg_word_count FROM articles INNER JOIN countries ON articles.country_id = countries.id INNER JOIN categories ON articles.category_id = categories.id WHERE categories.name = 'Politics' GROUP BY countries.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 314, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangefuncs' (example 154).", "schema": null, "sql": "DROP FUNCTION getrngfunc4(int);", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the region 1 (Canada) date associated with a region 2 (UK) date of May 18, 2009?", "schema": "CREATE TABLE table_240936_2 (region_1__can_ VARCHAR, region_2__uk_ VARCHAR)", "sql": "SELECT region_1__can_ FROM table_240936_2 WHERE region_2__uk_ = 'May 18, 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many electric vehicles are there in Canada and Australia?", "schema": "CREATE TABLE electric_vehicles (id INT, country VARCHAR(255), name VARCHAR(255), quantity INT); INSERT INTO electric_vehicles (id, country, name, quantity) VALUES (1, 'Canada', 'EV A', 5000), (2, 'Australia', 'EV B', 7000), (3, 'USA', 'EV C', 12000);", "sql": "SELECT SUM(quantity) FROM electric_vehicles WHERE country IN ('Canada', 'Australia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average transaction value (in USD) for the Bitcoin network in the last week?", "schema": "CREATE TABLE BitcoinTransactions (id INT, txid VARCHAR(100), value DECIMAL(20,2), timestamp BIGINT); INSERT INTO BitcoinTransactions (id, txid, value, timestamp) VALUES (1, '...', 100, 1643324480), (2, '...', 200, 1643410880);", "sql": "SELECT AVG(value * (SELECT rate FROM ExchangeRates WHERE currency = 'BTC' AND timestamp = tx.timestamp)) as avg_value_usd FROM BitcoinTransactions tx WHERE timestamp >= UNIX_TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK)) * 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest frequency owned by multicultural broadcasting with a Format of vietnamese?", "schema": "CREATE TABLE table_name_40 (frequency INTEGER, status VARCHAR, format VARCHAR)", "sql": "SELECT MAX(frequency) FROM table_name_40 WHERE status = 'owned by multicultural broadcasting' AND format = 'vietnamese';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many sound engineers were there for law hakon '3er leek?", "schema": "CREATE TABLE table_28005100_1 (sound_engineer VARCHAR, title VARCHAR)", "sql": "SELECT COUNT(sound_engineer) FROM table_28005100_1 WHERE title = 'Law Hakon '3er Leek';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT JSON_OBJECT(RETURNING bytea FORMAT JSON ENCODING UTF32);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_OBJECT(RETURNING bytea FORMAT JSON ENCODING UTF32)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Game site of shea stadium, and a Week smaller than 10, and a Opponent of baltimore colts happened on what date?", "schema": "CREATE TABLE table_name_13 (date VARCHAR, opponent VARCHAR, game_site VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_13 WHERE game_site = 'shea stadium' AND week < 10 AND opponent = 'baltimore colts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What spacecrafts had 22 orbital flights?", "schema": "CREATE TABLE table_179174_2 (spacecraft VARCHAR, flights VARCHAR)", "sql": "SELECT spacecraft FROM table_179174_2 WHERE flights = '22 Orbital';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 126).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (2,6,'-994877526002806872754342148749240.99659316232359475297606895243958507460511031229368344962653674268847910587702140353344168594152240599109936336446284803020643582102868247857009494139535009572740621288230740389545481395');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 260, "num_statements": 1} {"question": "What is the average age of community health workers by their ethnicity?", "schema": "CREATE TABLE CommunityHealthWorkers (WorkerID INT, Age INT, Ethnicity VARCHAR(255)); INSERT INTO CommunityHealthWorkers (WorkerID, Age, Ethnicity) VALUES (1, 45, 'Hispanic'), (2, 34, 'African American'), (3, 50, 'Asian'), (4, 40, 'Caucasian');", "sql": "SELECT Ethnicity, AVG(Age) as AvgAge FROM CommunityHealthWorkers GROUP BY Ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "List all the auto shows with more than 500 exhibitors in Japan.", "schema": "CREATE TABLE Auto_Shows (id INT, name TEXT, exhibitors INT, country TEXT); INSERT INTO Auto_Shows (id, name, exhibitors, country) VALUES (1, 'Tokyo Auto Show', 600, 'Japan'); INSERT INTO Auto_Shows (id, name, exhibitors, country) VALUES (2, 'Osaka Auto Show', 400, 'Japan');", "sql": "SELECT name FROM Auto_Shows WHERE exhibitors > 500 AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest tie with less than 100 points and 277 goals?", "schema": "CREATE TABLE table_name_57 (tied INTEGER, points VARCHAR, goals_for VARCHAR)", "sql": "SELECT MIN(tied) FROM table_name_57 WHERE points < 100 AND goals_for = 277;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 1030).", "schema": null, "sql": "INSERT INTO pk VALUES (20), (50);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the maximum number of likes received by a post in each content type in the past week?", "schema": "CREATE TABLE post_likes (post_id INT, content_type VARCHAR(20), likes INT); INSERT INTO post_likes (post_id, content_type, likes) VALUES (1, 'photo', 100), (2, 'video', 200), (3, 'link', 150), (4, 'photo', 250), (5, 'video', 300);", "sql": "SELECT content_type, MAX(likes) AS max_likes FROM post_likes WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY content_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the total revenue for the Pop genre in 2021?", "schema": "CREATE TABLE music_sales (sale_id INT, genre VARCHAR(10), year INT, revenue FLOAT); INSERT INTO music_sales (sale_id, genre, year, revenue) VALUES (1, 'Pop', 2021, 50000.00), (2, 'Rock', 2021, 45000.00), (3, 'Pop', 2020, 40000.00); CREATE VIEW genre_sales AS SELECT genre, SUM(revenue) as total_revenue FROM music_sales GROUP BY genre;", "sql": "SELECT total_revenue FROM genre_sales WHERE genre = 'Pop';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many employees work in the 'Risk Management' department in the 'New York' office?", "schema": "CREATE TABLE office (office_id INT, city VARCHAR(50), state VARCHAR(50)); CREATE TABLE department (department_id INT, department_name VARCHAR(50), office_id INT); CREATE TABLE employee (employee_id INT, employee_name VARCHAR(100), department_id INT); INSERT INTO office (office_id, city, state) VALUES (1, 'New York', 'NY'), (2, 'Toronto', 'ON'); INSERT INTO department (department_id, department_name, office_id) VALUES (1, 'Risk Management', 1), (2, 'IT', 1); INSERT INTO employee (employee_id, employee_name, department_id) VALUES (1, 'Bob Brown', 1), (2, 'Sara Green', 2);", "sql": "SELECT COUNT(*) FROM employee JOIN department ON employee.department_id = department.department_id JOIN office ON department.office_id = office.office_id WHERE office.city = 'New York' AND department.department_name = 'Risk Management';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of audience for festivals?", "schema": "CREATE TABLE festival_detail (Num_of_Audience INTEGER)", "sql": "SELECT AVG(Num_of_Audience) FROM festival_detail;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Determine the percentage of female founders in each country", "schema": "CREATE TABLE founders (id INT, name VARCHAR(255), gender VARCHAR(10), country VARCHAR(255)); INSERT INTO founders (id, name, gender, country) VALUES (1, 'John Doe', 'Male', 'USA'), (2, 'Jane Smith', 'Female', 'USA'), (3, 'Mike Johnson', 'Male', 'Canada'), (4, 'Alice Williams', 'Female', 'Canada'), (5, 'Bob Brown', 'Male', 'UK'), (6, 'Claire Johnson', 'Female', 'UK'), (7, 'Suresh Patel', 'Male', 'India'), (8, 'Priya Gupta', 'Female', 'India');", "sql": "SELECT country, gender, COUNT(*) as head_count, ROUND(COUNT(*)*100.0/SUM(COUNT(*)) OVER (PARTITION BY country), 2) as gender_percentage FROM founders GROUP BY country, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "What is the name and gene sequence of patients with illness 'Flu'?", "schema": "CREATE TABLE patients (id INT PRIMARY KEY, name VARCHAR(255), age INT, gender VARCHAR(10), illness VARCHAR(255)); INSERT INTO patients (id, name, age, gender, illness) VALUES (1, 'Jane Smith', 28, 'Female', 'Flu');", "sql": "SELECT patients.name, patients.illness FROM patients WHERE patients.illness = 'Flu';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of community engagement events in Oceania with more than 500 participants?", "schema": "CREATE TABLE CommunityEngagement (id INT, name TEXT, region TEXT, participants INT); INSERT INTO CommunityEngagement (id, name, region, participants) VALUES (1, 'Festival 1', 'Oceania', 600), (2, 'Festival 2', 'Oceania', 400), (3, 'Festival 3', 'Europe', 800);", "sql": "SELECT SUM(*) FROM CommunityEngagement WHERE region = 'Oceania' AND participants > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Update the membership count for the 'United Auto Workers' union in the 'Transportation' industry in New York.", "schema": "CREATE TABLE unions (id INT, name TEXT, industry TEXT, state TEXT, membership_count INT); INSERT INTO unions (id, name, industry, state, membership_count) VALUES (1, 'United Auto Workers', 'Transportation', 'New York', 5000);", "sql": "UPDATE unions SET membership_count = 5500 WHERE name = 'United Auto Workers' AND industry = 'Transportation' AND state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many placed 4th on April 25?", "schema": "CREATE TABLE table_19317584_2 (date VARCHAR)", "sql": "SELECT COUNT(4 AS th_placed) FROM table_19317584_2 WHERE date = 'April 25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which landfills have reached 75% of their capacity or more?", "schema": "CREATE TABLE Landfills (LandfillID INT, Capacity INT, Location VARCHAR(50));CREATE TABLE WasteGenerators (GeneratorID INT, WasteType VARCHAR(20), GeneratedTonnes DECIMAL(5,2), LandfillID INT);CREATE VIEW FilledLandfills AS SELECT L.Location, SUM(WG.GeneratedTonnes) AS TotalTonnes FROM Landfills L INNER JOIN WasteGenerators WG ON L.LandfillID = WG.LandfillID GROUP BY L.Location;", "sql": "SELECT L.Location, (SUM(WG.GeneratedTonnes) / L.Capacity) * 100 AS PercentageFilled FROM FilledLandfills F INNER JOIN Landfills L ON F.Location = L.Location GROUP BY F.Location HAVING PercentageFilled >= 75;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "What is the average calorie count for gluten-free meals in Japan?", "schema": "CREATE TABLE Meals(id INT, name TEXT, calories INT, is_gluten_free BOOLEAN, country TEXT); INSERT INTO Meals(id, name, calories, is_gluten_free, country) VALUES (1, 'Sashimi Salad', 300, TRUE, 'Japan'), (2, 'Rice Bowl', 450, FALSE, 'Japan');", "sql": "SELECT AVG(calories) FROM Meals WHERE is_gluten_free = TRUE AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show organizations that provided training in 2023", "schema": "CREATE TABLE cultural_competency_training (id INT PRIMARY KEY, organization_name VARCHAR(50), training_title TEXT, training_date DATE);", "sql": "SELECT DISTINCT organization_name FROM cultural_competency_training WHERE training_date >= '2023-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of Tommy Bolt", "schema": "CREATE TABLE table_name_23 (score INTEGER, player VARCHAR)", "sql": "SELECT MIN(score) FROM table_name_23 WHERE player = 'tommy bolt';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score when the player is Matt Kuchar?", "schema": "CREATE TABLE table_28498999_5 (score VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_28498999_5 WHERE player = 'Matt Kuchar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show the change in ocean pH levels for the Mariana Trench over time, with weekly intervals.", "schema": "CREATE TABLE OCEAN_PH (LOCATION VARCHAR(20), MEASUREMENT_DATE DATE, PH FLOAT); INSERT INTO OCEAN_PH (LOCATION, MEASUREMENT_DATE, PH) VALUES ('Mariana Trench', '2022-01-01', 7.8), ('Mariana Trench', '2022-01-07', 7.7), ('Mariana Trench', '2022-01-14', 7.6), ('Mariana Trench', '2022-01-21', 7.5), ('Mariana Trench', '2022-01-28', 7.4);", "sql": "SELECT LOCATION, MEASUREMENT_DATE, PH, LEAD(PH, 1) OVER (PARTITION BY LOCATION ORDER BY MEASUREMENT_DATE) AS NEXT_PH FROM OCEAN_PH WHERE LOCATION = 'Mariana Trench' ORDER BY MEASUREMENT_DATE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 191, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the audition city for hyatt regency chicago", "schema": "CREATE TABLE table_22897967_1 (audition_city VARCHAR, callback_venue VARCHAR)", "sql": "SELECT audition_city FROM table_22897967_1 WHERE callback_venue = 'Hyatt Regency Chicago';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In italy, when the stolen ends were 10 and blank ends were under 14, what's the lowest ends won?", "schema": "CREATE TABLE table_name_45 (ends_won INTEGER, blank_ends VARCHAR, stolen_ends VARCHAR, locale VARCHAR)", "sql": "SELECT MIN(ends_won) FROM table_name_45 WHERE stolen_ends = 10 AND locale = 'italy' AND blank_ends < 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'inet' (example 6).", "schema": null, "sql": "INSERT INTO INET_TBL (c, i) VALUES ('192.168.1', '192.168.1.0/25');", "explanation": "DML from PostgreSQL core regression test for Inet.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What was the total weight of cannabis sold in Oregon in the first quarter of 2021?", "schema": "CREATE TABLE sales (id INT, state VARCHAR(20), weight DECIMAL(10,2), month INT, year INT);", "sql": "SELECT SUM(weight) FROM sales WHERE state = 'Oregon' AND month BETWEEN 1 AND 3 AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "List all companies that received funding over $10M in the last 3 years", "schema": "CREATE TABLE funding (id INT, company_name VARCHAR(30), funding_amount DECIMAL(10,2), funding_date DATE); INSERT INTO funding (id, company_name, funding_amount, funding_date) VALUES (1, 'CompanyA', 5000000.00, '2020-01-01'); INSERT INTO funding (id, company_name, funding_amount, funding_date) VALUES (2, 'CompanyB', 20000000.00, '2019-06-15');", "sql": "SELECT company_name FROM funding WHERE funding_amount > 10000000 AND funding_date >= DATE_SUB(CURDATE(), INTERVAL 3 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the Paul Revere 250 race in the Daytona International Speedway circuit?", "schema": "CREATE TABLE table_name_66 (date VARCHAR, circuit VARCHAR, race VARCHAR)", "sql": "SELECT date FROM table_name_66 WHERE circuit = 'daytona international speedway' AND race = 'paul revere 250';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 304).", "schema": null, "sql": "-- is_window( NAME, NAME[] )\n-- isnt_window( NAME, NAME[] )\nSELECT * FROM check_test(\n is_window( 'ntile', ARRAY['int'] ),\n true,\n 'is_window(win, arg, desc)',\n 'Function ntile(int) should be a window function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "Insert environmental impact data for hydrogen peroxide", "schema": "CREATE TABLE environmental_impact (chemical_name VARCHAR(255), impact_description TEXT);", "sql": "INSERT INTO environmental_impact (chemical_name, impact_description) VALUES ('hydrogen peroxide', 'Low environmental impact due to its rapid decomposition into water and oxygen.');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Pick #, when Position is Guard, and when Round is greater than 2?", "schema": "CREATE TABLE table_name_36 (pick__number INTEGER, position VARCHAR, round VARCHAR)", "sql": "SELECT SUM(pick__number) FROM table_name_36 WHERE position = 'guard' AND round > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'domain': Write the SELECT query (example 367).", "schema": null, "sql": "select null::inotnull;", "explanation": "Regression test for Domain in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select null::inotnull) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total appearances when the total goals is 289?", "schema": "CREATE TABLE table_29701419_2 (total_appearances INTEGER, total_goals VARCHAR)", "sql": "SELECT MIN(total_appearances) FROM table_29701419_2 WHERE total_goals = 289;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the final tables made and the best finishes of poker players.", "schema": "CREATE TABLE poker_player (Final_Table_Made VARCHAR, Best_Finish VARCHAR)", "sql": "SELECT Final_Table_Made, Best_Finish FROM poker_player;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "How many compliance violations occurred in the last year in the Atlantic region?", "schema": "CREATE TABLE compliance_violations (id INT, violation_name VARCHAR(255), violation_date DATE, region VARCHAR(255));", "sql": "SELECT region, count(*) FROM compliance_violations WHERE violation_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND region = 'Atlantic' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many districts have W. Arthur Winstead as elected official?", "schema": "CREATE TABLE table_1341930_24 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(district) FROM table_1341930_24 WHERE incumbent = 'W. Arthur Winstead';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 21).", "schema": null, "sql": "select regexp_matches('abc', 'a(?=b)b*(?=c)c*');", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select regexp_matches('abc', 'a(?=b)b*(?=c)c*')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Who is the top scorer among players from Canada in the 2019 season?", "schema": "CREATE TABLE players (player_id INT, name TEXT, nationality TEXT, points INT, season INT); INSERT INTO players (player_id, name, nationality, points, season) VALUES (1, 'Alice Johnson', 'Canada', 700, 2019), (2, 'Bob Williams', 'Canada', 600, 2019);", "sql": "SELECT name, MAX(points) FROM players WHERE nationality = 'Canada' AND season = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 218).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_leftop( 'pg_catalog', '+', 'text', 'bigint'::name ),\n true,\n 'hasnt_leftop( schema, name, right, result )',\n 'Left operator pg_catalog.+(NONE,text) RETURNS bigint should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 226, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 88).", "schema": null, "sql": "CREATE FUNCTION gbt_float4_distance(internal,float4,int2,oid,internal)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "List the names of African countries with more than 5 rural hospitals.", "schema": "CREATE TABLE hospitals (hospital_id INT, country VARCHAR(20), num_beds INT); INSERT INTO hospitals (hospital_id, country, num_beds) VALUES (1, 'Kenya', 50), (2, 'Tanzania', 75), (3, 'Uganda', 60);", "sql": "SELECT country FROM hospitals WHERE country IN ('Kenya', 'Tanzania', 'Uganda') GROUP BY country HAVING COUNT(*) > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Identify the EIA reports that have been reviewed and approved, and list the corresponding mine names and report_ids.", "schema": "CREATE TABLE eia_reports (report_id INT, mine_id INT, report_status TEXT); INSERT INTO eia_reports (report_id, mine_id, report_status) VALUES (1, 1, 'In Progress'), (2, 2, 'Completed'), (3, 3, 'Approved'), (4, 4, 'Rejected'); CREATE TABLE mines (mine_id INT, mine_name TEXT); INSERT INTO mines (mine_id, mine_name) VALUES (1, 'MineA'), (2, 'MineB'), (3, 'MineC'), (4, 'MineD');", "sql": "SELECT e.report_id, m.mine_name FROM eia_reports e JOIN mines m ON e.mine_id = m.mine_id WHERE e.report_status = 'Approved';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many nicknames were associated with Milton, Massachusetts?", "schema": "CREATE TABLE table_1973729_1 (nickname VARCHAR, location VARCHAR)", "sql": "SELECT COUNT(nickname) FROM table_1973729_1 WHERE location = 'Milton, Massachusetts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What monarch(s) did Stanley Baldwin (1st ministry) serve?", "schema": "CREATE TABLE table_name_18 (monarchs_served VARCHAR, name VARCHAR)", "sql": "SELECT monarchs_served FROM table_name_18 WHERE name = 'stanley baldwin (1st ministry)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "List all circular economy initiatives in 'Asia' from the 'circular_economy_initiatives' table", "schema": "CREATE TABLE circular_economy_initiatives (id INT, region VARCHAR(50), initiative VARCHAR(100));", "sql": "SELECT initiative FROM circular_economy_initiatives WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which captain's manager is Joe Royle?", "schema": "CREATE TABLE table_name_49 (captain VARCHAR, manager VARCHAR)", "sql": "SELECT captain FROM table_name_49 WHERE manager = 'joe royle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which captain has howard wilkinson as the manager?", "schema": "CREATE TABLE table_name_52 (captain VARCHAR, manager VARCHAR)", "sql": "SELECT captain FROM table_name_52 WHERE manager = 'howard wilkinson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'object_address' (example 15).", "schema": null, "sql": "CREATE VIEW addr_nsp.genview AS SELECT * from addr_nsp.gentable;", "explanation": "DDL from PostgreSQL core regression test for Object Address.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total revenue for each cuisine type, excluding the Italian cuisine?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, cuisine TEXT, revenue INT); INSERT INTO restaurants (id, name, cuisine, revenue) VALUES (1, 'Restaurant A', 'Italian', 5000), (2, 'Restaurant B', 'Mexican', 6000), (3, 'Restaurant C', 'Italian', 7000);", "sql": "SELECT cuisine, SUM(revenue) FROM restaurants WHERE cuisine != 'Italian' GROUP BY cuisine;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the first name and last name of Linda Smith's advisor?", "schema": "CREATE TABLE Student (advisor VARCHAR, fname VARCHAR, lname VARCHAR); CREATE TABLE Faculty (fname VARCHAR, lname VARCHAR, FacID VARCHAR)", "sql": "SELECT T1.fname, T1.lname FROM Faculty AS T1 JOIN Student AS T2 ON T1.FacID = T2.advisor WHERE T2.fname = 'Linda' AND T2.lname = 'Smith';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "How many artists have released albums in the last 3 years, grouped by genre?", "schema": "CREATE TABLE AlbumReleases (id INT, year INT, genre VARCHAR(50));", "sql": "SELECT genre, COUNT(*) FROM AlbumReleases WHERE year BETWEEN (SELECT MAX(year) FROM AlbumReleases) - 2 AND (SELECT MAX(year) FROM AlbumReleases) GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Delete a volunteer's record, given their ID", "schema": "CREATE TABLE Volunteers (id INT, first_name VARCHAR, last_name VARCHAR, email VARCHAR, phone_number VARCHAR, date_joined DATE); INSERT INTO Volunteers (id, first_name, last_name, email, phone_number, date_joined) VALUES (1, 'John', 'Doe', 'john.doe@email.com', '555-123-4567', '2021-05-01'), (2, 'Jane', 'Doe', 'jane.doe@email.com', '555-987-6543', '2021-06-01');", "sql": "DELETE FROM Volunteers WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the average CO2 emissions of products in each category?", "schema": "CREATE TABLE products (product_id int, product_category varchar(50), co2_emissions decimal(5,2));", "sql": "SELECT product_category, AVG(co2_emissions) as avg_co2_emissions FROM products GROUP BY product_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Beyer Peacock's SR number with a SECR number of 769?", "schema": "CREATE TABLE table_name_10 (sr_no INTEGER, builder VARCHAR, secr_no VARCHAR)", "sql": "SELECT AVG(sr_no) FROM table_name_10 WHERE builder = 'beyer peacock' AND secr_no = 769;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the name of the project that peaked at 40 and was in the USA?", "schema": "CREATE TABLE table_name_38 (project_name VARCHAR, peak VARCHAR, country VARCHAR)", "sql": "SELECT project_name FROM table_name_38 WHERE peak = '40' AND country = 'usa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Date of 7 september 1996 includes which highest rank athlete?", "schema": "CREATE TABLE table_name_68 (rank INTEGER, date VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_68 WHERE date = '7 september 1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "update the ticket sales records for a specific game", "schema": "CREATE TABLE ticket_sales (id INT PRIMARY KEY, game_id INT, number_of_tickets INT, date DATE);", "sql": "UPDATE ticket_sales SET number_of_tickets = 600 WHERE game_id = 123 AND date = '2022-05-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert_conflict' (example 201).", "schema": null, "sql": "insert into selfconflict values (8,1), (8,2) on conflict(f1) do select returning *;", "explanation": "DML from PostgreSQL core regression test for Insert Conflict.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 83, "num_statements": 1} {"question": "What is the average CO2 emissions in the transportation sector in Germany from 2018 to 2020?", "schema": "CREATE TABLE co2_emissions_germany (sector VARCHAR(255), year INT, co2_emissions INT); INSERT INTO co2_emissions_germany VALUES ('Transportation', 2018, 800), ('Transportation', 2018, 850), ('Transportation', 2019, 820), ('Transportation', 2019, 870), ('Transportation', 2020, 780), ('Transportation', 2020, 830);", "sql": "SELECT AVG(co2_emissions) FROM co2_emissions_germany WHERE sector = 'Transportation' AND year BETWEEN 2018 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the total quantity of items in warehouses located in 'NJ' and 'MX'?", "schema": "CREATE TABLE warehouses (id INT, location VARCHAR(10), item VARCHAR(10), quantity INT); INSERT INTO warehouses (id, location, item, quantity) VALUES (1, 'NY', 'A101', 200), (2, 'NJ', 'A101', 300), (3, 'CA', 'B203', 150), (4, 'NY', 'C304', 50), (5, 'MX', 'B203', 250);", "sql": "SELECT SUM(quantity) FROM warehouses WHERE location IN ('NJ', 'MX');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Year(s) won has a Total of 147?", "schema": "CREATE TABLE table_name_57 (year_s__won VARCHAR, total VARCHAR)", "sql": "SELECT year_s__won FROM table_name_57 WHERE total = 147;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the coverage area of muzik fm station, which has a music genre?", "schema": "CREATE TABLE table_name_65 (coverage_area VARCHAR, genre VARCHAR, station VARCHAR)", "sql": "SELECT coverage_area FROM table_name_65 WHERE genre = 'music' AND station = 'muzik fm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What round was adelaide united the opposition with an attendance of 18,345?", "schema": "CREATE TABLE table_name_32 (round VARCHAR, opposition VARCHAR, attendance VARCHAR)", "sql": "SELECT round FROM table_name_32 WHERE opposition = 'adelaide united' AND attendance = '18,345';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result for 10/04/1947?", "schema": "CREATE TABLE table_name_75 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_75 WHERE date = '10/04/1947';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many airlines have the star alliance and are in brazil?", "schema": "CREATE TABLE table_name_13 (rank VARCHAR, alliance VARCHAR, country VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_13 WHERE alliance = 'star alliance' AND country = 'brazil';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Visitor is listed as having a Date of December 17?", "schema": "CREATE TABLE table_name_13 (visitor VARCHAR, date VARCHAR)", "sql": "SELECT visitor FROM table_name_13 WHERE date = 'december 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average attendance for the date of april 4?", "schema": "CREATE TABLE table_name_23 (attendance INTEGER, date VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_23 WHERE date = 'april 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 260).", "schema": null, "sql": "SELECT * FROM check_test(\n domain_type_is( 'public', 'integer', 'int', 'whatever'),\n false,\n 'domain_type_is(schema, type, type, desc) fail',\n 'whatever',\n ' Domain public.integer does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 9).", "schema": null, "sql": "CREATE OPERATOR <=> (\n\tLEFTARG = cube, RIGHTARG = cube, PROCEDURE = distance_chebyshev,\n\tCOMMUTATOR = '<=>'\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 110, "num_statements": 1} {"question": "What are the circular economy initiatives by city and their start dates?", "schema": "CREATE TABLE circular_economy (city VARCHAR(255), initiative VARCHAR(255), start_date DATE); INSERT INTO circular_economy (city, initiative, start_date) VALUES ('CityA', 'Composting', '2018-05-01'), ('CityA', 'Recycling', '2017-12-01'), ('CityB', 'Plastic Reduction', '2019-08-01'), ('CityB', 'E-waste Collection', '2018-09-01');", "sql": "SELECT city, initiative, start_date FROM circular_economy;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average age of all cheetahs in the 'animal_population' table?", "schema": "CREATE TABLE animal_population (animal_id INT, animal_type VARCHAR(10), age INT); INSERT INTO animal_population (animal_id, animal_type, age) VALUES (1, 'cheetah', 8); INSERT INTO animal_population (animal_id, animal_type, age) VALUES (2, 'cheetah', 6);", "sql": "SELECT AVG(age) FROM animal_population WHERE animal_type = 'cheetah';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which result featured the Indianapolis Colts as opponents?", "schema": "CREATE TABLE table_name_72 (result VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_72 WHERE opponent = 'indianapolis colts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Tournament in 2010 also has Grand Slam tournaments in 2011", "schema": "CREATE TABLE table_name_74 (Id VARCHAR)", "sql": "SELECT 2010 FROM table_name_74 WHERE 2011 = 'grand slam tournaments';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Identify the number of rural hospitals in each state with more than 50 employees.", "schema": "CREATE TABLE hospitals (id INT, name TEXT, state TEXT, num_employees INT);", "sql": "SELECT state, COUNT(*) FROM hospitals WHERE num_employees > 50 GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which province has an elevation of 4550?", "schema": "CREATE TABLE table_2251578_4 (province VARCHAR, elevation__m_ VARCHAR)", "sql": "SELECT province FROM table_2251578_4 WHERE elevation__m_ = 4550;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 198).", "schema": null, "sql": "SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx');", "explanation": "Regression test for Tsearch in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the result with candidates being billy tauzin (d) unopposed", "schema": "CREATE TABLE table_1341586_19 (result VARCHAR, candidates VARCHAR)", "sql": "SELECT result FROM table_1341586_19 WHERE candidates = 'Billy Tauzin (D) Unopposed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the telephone 052 for area km2 being 5.42", "schema": "CREATE TABLE table_14465924_1 (telephone__052_ VARCHAR, area__km_2__ VARCHAR)", "sql": "SELECT telephone__052_ FROM table_14465924_1 WHERE area__km_2__ = '5.42';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What are the unique IP addresses that have sent malicious traffic during the last month, both in and outside the organization?", "schema": "CREATE TABLE malicious_traffic (ip_address VARCHAR(50), is_internal BOOLEAN, timestamp TIMESTAMP); INSERT INTO malicious_traffic (ip_address, is_internal, timestamp) VALUES ('8.8.8.8', true, '2022-01-01 10:00:00'); INSERT INTO malicious_traffic (ip_address, is_internal, timestamp) VALUES ('192.168.1.1', false, '2022-01-02 15:30:00');", "sql": "SELECT DISTINCT ip_address FROM malicious_traffic WHERE timestamp >= NOW() - INTERVAL '1 month';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "How many healthcare facilities are there in each district?", "schema": "CREATE TABLE Healthcare (District VARCHAR(255), FacilityType VARCHAR(255), Quantity INT); INSERT INTO Healthcare (District, FacilityType, Quantity) VALUES ('DistrictA', 'Hospital', 2), ('DistrictA', 'Clinic', 5), ('DistrictB', 'Hospital', 3), ('DistrictB', 'Clinic', 4);", "sql": "SELECT District, FacilityType, SUM(Quantity) FROM Healthcare GROUP BY District, FacilityType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Determine the average fairness score of algorithms in South America", "schema": "CREATE TABLE Algorithms (AlgorithmId INT, Name TEXT, FairnessScore FLOAT, Country TEXT); INSERT INTO Algorithms (AlgorithmId, Name, FairnessScore, Country) VALUES (1, 'AlgorithmA', 0.8, 'Brazil'), (2, 'AlgorithmB', 0.9, 'Argentina'), (3, 'AlgorithmC', 0.7, 'Colombia');", "sql": "SELECT AVG(FairnessScore) FROM Algorithms WHERE Country = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many research grants were awarded to each department?", "schema": "CREATE TABLE departments (id INT, department_name VARCHAR(255)); CREATE TABLE research_grants (id INT, grant_name VARCHAR(255), grant_amount INT, department_id INT, PRIMARY KEY (id), FOREIGN KEY (department_id) REFERENCES departments(id)); INSERT INTO departments (id, department_name) VALUES (1, 'Computer Science'), (2, 'Mathematics'), (3, 'Physics'); INSERT INTO research_grants (id, grant_name, grant_amount, department_id) VALUES (1, 'Grant1', 50000, 1), (2, 'Grant2', 75000, 2), (3, 'Grant3', 100000, 3);", "sql": "SELECT d.department_name, COUNT(rg.id) as grant_count FROM departments d JOIN research_grants rg ON d.id = rg.department_id GROUP BY d.department_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many byes were there with an Against of 1655 and more than 10 wins?", "schema": "CREATE TABLE table_name_51 (byes INTEGER, against VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(byes) FROM table_name_51 WHERE against = 1655 AND wins > 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total sales amount for each sales representative in the North region, ordered by total sales in descending order?", "schema": "CREATE TABLE sales_representatives (id INT, name TEXT, region TEXT, sales FLOAT); INSERT INTO sales_representatives (id, name, region, sales) VALUES (1, 'Raul Sanchez', 'North', 5000), (2, 'Tanya Patel', 'South', 6000), (3, 'Mohammed Ali', 'East', 7000), (4, 'Emily Wang', 'West', 8000), (5, 'Jose Gonzalez', 'North', 9000), (6, 'Anna Kwon', 'North', 10000);", "sql": "SELECT name, SUM(sales) as total_sales FROM sales_representatives WHERE region = 'North' GROUP BY name ORDER BY total_sales DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "List all environmental impact assessments and their corresponding facilities for the past year.", "schema": "CREATE TABLE environmental_assessments (id INT, assessment_date DATE, facility TEXT); INSERT INTO environmental_assessments (id, assessment_date, facility) VALUES (1, '2022-01-01', 'Facility1'), (2, '2022-03-15', 'Facility2'), (3, '2022-02-01', 'Facility3');", "sql": "SELECT * FROM environmental_assessments WHERE assessment_date >= DATEADD(year, -1, CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Companion (in order from star) has a Semimajor axis (AU) of 0.1886 +0.083 −0.0104?", "schema": "CREATE TABLE table_name_58 (companion__in_order_from_star_ VARCHAR, semimajor_axis___au__ VARCHAR)", "sql": "SELECT companion__in_order_from_star_ FROM table_name_58 WHERE semimajor_axis___au__ = '0.1886 +0.083 −0.0104';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "List the number of employees hired in each month in 2021 for the marketing department.", "schema": "CREATE TABLE hiring (id INT, employee_id INT, hire_date DATE); INSERT INTO hiring (id, employee_id, hire_date) VALUES (1, 3, '2021-01-15'), (2, 4, '2021-03-05'), (3, 5, '2021-04-20'), (4, 6, '2021-12-31');", "sql": "SELECT EXTRACT(MONTH FROM hire_date) AS month, COUNT(*) AS hired_count FROM hiring JOIN employees ON hiring.employee_id = employees.id WHERE department = 'Marketing' AND YEAR(hire_date) = 2021 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many bronze medals has a total less than 3 with a silver less than 1 and gold more than 1?", "schema": "CREATE TABLE table_name_66 (bronze INTEGER, gold VARCHAR, total VARCHAR, silver VARCHAR)", "sql": "SELECT AVG(bronze) FROM table_name_66 WHERE total < 3 AND silver < 1 AND gold > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Determine the average water savings in liters achieved by the implemented water conservation initiatives in 'Beijing' for the month of March 2021", "schema": "CREATE TABLE savings_data (region VARCHAR(50), date DATE, savings FLOAT); INSERT INTO savings_data (region, date, savings) VALUES ('Beijing', '2021-03-01', 100), ('Beijing', '2021-03-02', 110), ('Beijing', '2021-03-03', 120);", "sql": "SELECT AVG(savings) FROM savings_data WHERE region = 'Beijing' AND date BETWEEN '2021-03-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the average salary of employees who underwent leadership training in the Finance department?", "schema": "CREATE TABLE EmployeeTrainings (EmployeeID INT, Department TEXT, Salary REAL, Training TEXT); INSERT INTO EmployeeTrainings (EmployeeID, Department, Salary, Training) VALUES (1, 'Finance', 72000, 'Leadership');", "sql": "SELECT AVG(Salary) FROM EmployeeTrainings WHERE Department = 'Finance' AND Training = 'Leadership';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the average, minimum, and maximum ticket prices for exhibitions for all years before 2009.", "schema": "CREATE TABLE exhibition (ticket_price INTEGER, YEAR INTEGER)", "sql": "SELECT AVG(ticket_price), MIN(ticket_price), MAX(ticket_price) FROM exhibition WHERE YEAR < 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Create a view of satellites launched by the European Union", "schema": "CREATE TABLE satellite_deployment (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), launch_date DATE);", "sql": "CREATE VIEW eu_satellites AS SELECT * FROM satellite_deployment WHERE country = 'European Union';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (test_utils, item 1).", "schema": null, "sql": "\\c :TEST_DBNAME :ROLE_SUPERUSER\nCREATE OR REPLACE FUNCTION test.condition() RETURNS VOID\n AS :MODULE_PATHNAME, 'ts_test_utils_condition' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 182, "num_statements": 1} {"question": "What is the sum of all transaction values (in USD) for Binance Smart Chain in the past week?", "schema": "CREATE TABLE binance_smart_chain (transaction_time TIMESTAMP, transaction_value DECIMAL(18,2));", "sql": "SELECT SUM(transaction_value) FROM binance_smart_chain WHERE transaction_time >= NOW() - INTERVAL '1 week';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average population of countries with a GDP less than 1.5 and more than 30 million visitors in the international_visitors table?", "schema": "CREATE TABLE countries (country_id INT, name VARCHAR(50), population INT, gdp FLOAT); INSERT INTO countries (country_id, name, population, gdp) VALUES (1, 'Brazil', 210147125, 1.432); INSERT INTO countries (country_id, name, population, gdp) VALUES (2, 'Indonesia', 273523615, 1.019);", "sql": "SELECT AVG(c.population) FROM countries c INNER JOIN (SELECT country_id, SUM(visitors) as total_visitors FROM international_visitors GROUP BY country_id) iv ON c.country_id = iv.country_id WHERE c.gdp < 1.5 AND total_visitors > 30000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 237, "num_statements": 1} {"question": "Identify the total amount of climate finance spent on renewable energy sources in Asia and the number of adaptation projects in the region.", "schema": "CREATE TABLE climate_finance_re (region VARCHAR(50), source VARCHAR(50), amount NUMERIC(12,2)); INSERT INTO climate_finance_re (region, source, amount) VALUES ('Asia', 'Wind', 1500.00), ('Asia', 'Solar', 2500.00); CREATE TABLE climate_adaptation_re (project_name VARCHAR(50), region VARCHAR(50), status VARCHAR(50)); INSERT INTO climate_adaptation_re (project_name, region, status) VALUES ('Resilient Cities', 'Asia', 'Successful'), ('Green Infrastructure', 'Asia', 'Successful'), ('Coastal Protection', 'Asia', 'Successful');", "sql": "SELECT SUM(amount) FROM climate_finance_re WHERE region = 'Asia' AND source IN ('Wind', 'Solar'); SELECT COUNT(*) FROM climate_adaptation_re WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 2} {"question": "What is the minimum soil moisture level for each crop type in the past month?", "schema": "CREATE TABLE crop_soil_moisture (crop_type TEXT, date DATE, soil_moisture INTEGER); INSERT INTO crop_soil_moisture VALUES ('cotton', '2022-06-01', 40), ('rice', '2022-06-01', 50);", "sql": "SELECT crop_type, MIN(soil_moisture) as min_soil_moisture FROM crop_soil_moisture WHERE date >= CURDATE() - INTERVAL 1 MONTH GROUP BY crop_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much total time was in lane 4?", "schema": "CREATE TABLE table_name_66 (time VARCHAR, lane VARCHAR)", "sql": "SELECT COUNT(time) FROM table_name_66 WHERE lane = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 8).", "schema": null, "sql": "SELECT * FROM test_timestamptz WHERE i>='2004-10-26 08:55:08'::timestamptz ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most number for air date 2009/12/29", "schema": "CREATE TABLE table_21002034_7 (_number INTEGER, air_date_netherlands_yyyy_mm_dd VARCHAR)", "sql": "SELECT MAX(_number) FROM table_21002034_7 WHERE air_date_netherlands_yyyy_mm_dd = '2009/12/29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the lowest ranking rower from Great Britain?", "schema": "CREATE TABLE table_name_86 (rank INTEGER, country VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_86 WHERE country = 'great britain';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average labor rating for factories in each region?", "schema": "CREATE TABLE regions (region_id INT, name VARCHAR(255)); INSERT INTO regions VALUES (1, 'North America'); INSERT INTO regions VALUES (2, 'Asia'); CREATE TABLE factories (factory_id INT, name VARCHAR(255), location VARCHAR(255), country_id INT, labor_rating INT, region_id INT); INSERT INTO factories VALUES (1, 'Eco-Friendly Factory A', 'New York, NY', 1, 90, 1); INSERT INTO factories VALUES (2, 'Fairtrade Factory B', 'Delhi, India', 2, 85, 2);", "sql": "SELECT regions.name, AVG(factories.labor_rating) FROM regions JOIN factories ON regions.region_id = factories.region_id GROUP BY regions.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 42).", "schema": null, "sql": "-- ************************************************************\n-- * BEFORE INSERT or UPDATE on PSlot\n-- *\t- Ensure that our patchfield does exist\n-- ************************************************************\ncreate function tg_pslot_biu() returns trigger as $proc$\ndeclare\n pfrec\trecord;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 293, "num_statements": 1} {"question": "Which province in Canada has the highest energy storage capacity?", "schema": "CREATE TABLE energy_storage_Canada (province VARCHAR(255), source_type VARCHAR(255), capacity INT); INSERT INTO energy_storage_Canada (province, source_type, capacity) VALUES ('Ontario', 'Batteries', 3000), ('Quebec', 'Batteries', 4000), ('Ontario', 'Pumped Hydro', 8000);", "sql": "SELECT province, MAX(capacity) FROM energy_storage_Canada GROUP BY province;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "How many crimes were reported in District4 of CityR last year?", "schema": "CREATE TABLE crimes_3 (id INT, city VARCHAR(50), district VARCHAR(50), year INT, crime_count INT); INSERT INTO crimes_3 (id, city, district, year, crime_count) VALUES (1, 'CityR', 'District4', 2021, 39), (2, 'CityR', 'District4', 2020, 30), (3, 'CityS', 'District5', 2021, 55);", "sql": "SELECT SUM(crime_count) FROM crimes_3 WHERE city = 'CityR' AND district = 'District4' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Update the hours volunteered in the 'VolunteerHours' table", "schema": "CREATE TABLE VolunteerHours (VolunteerHoursID INT PRIMARY KEY, VolunteerID INT, Hours DECIMAL(10, 2), VolunteerDate DATE);", "sql": "UPDATE VolunteerHours SET Hours = 4.00 WHERE VolunteerHoursID = 401;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 35.", "schema": null, "sql": "EXECUTE format('UPDATE tbl SET %I = $1 WHERE key = $2', colname) USING newvalue, keyvalue;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent in the bout that led to a win in a time of 1:21?", "schema": "CREATE TABLE table_name_47 (opponent VARCHAR, res VARCHAR, time VARCHAR)", "sql": "SELECT opponent FROM table_name_47 WHERE res = 'win' AND time = '1:21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all allergy types.", "schema": "CREATE TABLE Allergy_type (allergytype VARCHAR)", "sql": "SELECT DISTINCT allergytype FROM Allergy_type;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of the nations with the 3 lowest populations?", "schema": "CREATE TABLE country (Name VARCHAR, Population VARCHAR)", "sql": "SELECT Name FROM country ORDER BY Population LIMIT 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is 2011, when 2009 is \"A\"?", "schema": "CREATE TABLE table_name_59 (Id VARCHAR)", "sql": "SELECT 2011 FROM table_name_59 WHERE 2009 = 'a';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had less than 7 goals, was ranked under 7, and had more than 8 appearances?", "schema": "CREATE TABLE table_name_86 (name VARCHAR, appearances VARCHAR, goals VARCHAR, rank VARCHAR)", "sql": "SELECT name FROM table_name_86 WHERE goals < 7 AND rank < 7 AND appearances > 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Delete the painting 'Guernica' from the 'Paintings' table.", "schema": "CREATE TABLE Paintings (PaintingID INT, Title VARCHAR(50), ArtistID INT, YearCreated INT); INSERT INTO Paintings (PaintingID, Title, ArtistID, YearCreated) VALUES (1, 'Starry Night', 1, 1889); INSERT INTO Paintings (PaintingID, Title, ArtistID, YearCreated) VALUES (2, 'Guernica', 2, 1937);", "sql": "DELETE FROM Paintings WHERE Title = 'Guernica';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Mountain Range has a Region of haiti, and a Location of 18.3601°n 71.9764°w?", "schema": "CREATE TABLE table_name_82 (mountain_range VARCHAR, region VARCHAR, location VARCHAR)", "sql": "SELECT mountain_range FROM table_name_82 WHERE region = 'haiti' AND location = '18.3601°n 71.9764°w';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the average heart rate for each member during 'Yoga' workouts in January 2022?", "schema": "CREATE TABLE memberships (id INT, member_type VARCHAR(50), region VARCHAR(50)); CREATE TABLE workout_data (member_id INT, workout_type VARCHAR(50), duration INT, heart_rate_avg INT, calories_burned INT, workout_date DATE);", "sql": "SELECT m.id, AVG(w.heart_rate_avg) as avg_heart_rate FROM memberships m JOIN workout_data w ON m.id = w.member_id WHERE w.workout_type = 'Yoga' AND w.workout_date >= DATE '2022-01-01' AND w.workout_date < DATE '2022-02-01' GROUP BY m.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 237, "num_statements": 1} {"question": "How many fans attended the games of the western conference teams?", "schema": "CREATE TABLE games (game_id INT, team_id INT, fans_attended INT); INSERT INTO games VALUES (1, 1, 15000), (2, 1, 16000), (3, 2, 17000), (4, 2, 18000), (5, 3, 19000);", "sql": "SELECT te.conference, SUM(g.fans_attended) as total_fans FROM games g JOIN teams te ON g.team_id = te.team_id WHERE te.conference = 'Western' GROUP BY te.conference;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "What is the average response time for emergency calls in the 'downtown' precinct?", "schema": "CREATE TABLE emergency_calls (id INT, call_time TIMESTAMP, precinct VARCHAR(20)); INSERT INTO emergency_calls (id, call_time, precinct) VALUES (1, '2022-01-01 12:30:00', 'downtown'), (2, '2022-01-01 15:10:00', 'north'), (3, '2022-01-01 09:45:00', 'downtown');", "sql": "SELECT AVG(EXTRACT(EPOCH FROM call_time - LAG(call_time) OVER (PARTITION BY precinct ORDER BY call_time))) AS avg_response_time FROM emergency_calls WHERE precinct = 'downtown';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 177, "num_statements": 1} {"question": "What is the total fare collected for metro lines in Paris?", "schema": "CREATE TABLE metro_lines (line_id INT, city VARCHAR(50)); INSERT INTO metro_lines (line_id, city) VALUES (1, 'Paris'), (2, 'Berlin'); CREATE TABLE fares_collected (line_id INT, fare DECIMAL(5,2)); INSERT INTO fares_collected (line_id, fare) VALUES (1, 500.00), (1, 750.00), (2, 300.00);", "sql": "SELECT SUM(fare) FROM fares_collected INNER JOIN metro_lines ON fares_collected.line_id = metro_lines.line_id WHERE city = 'Paris';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "What is the minimum number of visitors for jazz events in 2021?", "schema": "CREATE TABLE IF NOT EXISTS events (id INT, name VARCHAR(255), type VARCHAR(255), year INT, visitors INT); INSERT INTO events (id, name, type, year, visitors) VALUES (1, 'EventA', 'Jazz', 2021, 300), (2, 'EventB', 'Jazz', 2021, 450), (3, 'EventC', 'Jazz', 2021, 500);", "sql": "SELECT MIN(visitors) FROM events WHERE type = 'Jazz' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the minimum cultural competency score for mental health facilities in New York?", "schema": "CREATE TABLE mental_health_facilities (id INT, name VARCHAR, state VARCHAR, cultural_competency_score INT); INSERT INTO mental_health_facilities (id, name, state, cultural_competency_score) VALUES (1, 'Facility One', 'New York', 85); INSERT INTO mental_health_facilities (id, name, state, cultural_competency_score) VALUES (2, 'Facility Two', 'New York', 90);", "sql": "SELECT state, MIN(cultural_competency_score) as min_score FROM mental_health_facilities WHERE state = 'New York' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of rounds of the player with a pick of 20?", "schema": "CREATE TABLE table_name_87 (round VARCHAR, pick VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_87 WHERE pick = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the maximum number of experiments conducted by a single spacecraft, grouped by organization in the astrobiology_experiments and mission_data tables?", "schema": "CREATE TABLE astrobiology_experiments (experiment_id INT, name VARCHAR(100), spacecraft VARCHAR(100), launch_date DATE, experiments_conducted INT); CREATE TABLE mission_data (mission_id INT, name VARCHAR(100), organization VARCHAR(100), launch_date DATE, mission_cost FLOAT);", "sql": "SELECT organization, MAX(experiments_conducted) FROM astrobiology_experiments, mission_data WHERE astrobiology_experiments.launch_date = mission_data.launch_date GROUP BY organization;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Count how many deep-sea expeditions have been conducted by the \"Ocean Explorers\" organization?", "schema": "CREATE TABLE deep_sea_expeditions (expedition_id INT, name TEXT, organization TEXT, year INT);", "sql": "SELECT COUNT(*) FROM deep_sea_expeditions WHERE organization = 'Ocean Explorers';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 14).", "schema": null, "sql": "create function polyf(x anyrange) returns anyarray as $$\n select array[lower(x), upper(x)]\n$$ language sql;", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date contained Aubrey O'Day as the cover model?", "schema": "CREATE TABLE table_1566852_10 (date VARCHAR, cover_model VARCHAR)", "sql": "SELECT date FROM table_1566852_10 WHERE cover_model = 'Aubrey O'Day';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the 2009 ranking for Webometrics?", "schema": "CREATE TABLE table_name_40 (ranking VARCHAR)", "sql": "SELECT 2009 FROM table_name_40 WHERE ranking = 'webometrics';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Delete companies with no electric vehicles in the ElectricVehicles table.", "schema": "CREATE TABLE ElectricVehicles (id INT, company VARCHAR(20), vehicle_type VARCHAR(20), num_vehicles INT); INSERT INTO ElectricVehicles (id, company, vehicle_type, num_vehicles) VALUES (1, 'Tesla', 'EV', 1500000), (2, 'Nissan', 'Leaf', 500000), (3, 'Chevrolet', 'Bolt', 300000), (4, 'Ford', 'No EV', 0);", "sql": "DELETE FROM ElectricVehicles WHERE company NOT IN (SELECT company FROM ElectricVehicles WHERE vehicle_type = 'EV');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What was the total quantity of chemical 'A' produced in Q1 2021?", "schema": "CREATE TABLE chemical_production (date DATE, chemical VARCHAR(10), quantity INT); INSERT INTO chemical_production VALUES ('2021-01-01', 'A', 500), ('2021-01-05', 'A', 600), ('2021-02-10', 'A', 700), ('2021-03-20', 'A', 800);", "sql": "SELECT SUM(quantity) FROM chemical_production WHERE chemical = 'A' AND date BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the time of the bout against Dave Menne?", "schema": "CREATE TABLE table_name_33 (time VARCHAR, opponent VARCHAR)", "sql": "SELECT time FROM table_name_33 WHERE opponent = 'dave menne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player is from Joliet Catholic HIgh School?", "schema": "CREATE TABLE table_11677691_7 (player VARCHAR, school VARCHAR)", "sql": "SELECT player FROM table_11677691_7 WHERE school = 'Joliet Catholic High school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum number of peacekeeping operations that a single country has been involved in?", "schema": "CREATE TABLE Peacekeeping_Operations (id INT, country VARCHAR(50), year INT); INSERT INTO Peacekeeping_Operations (id, country, year) VALUES (1, 'United States', 2018), (2, 'United States', 2019), (3, 'United States', 2020), (4, 'United Kingdom', 2018), (5, 'Canada', 2019), (6, 'Australia', 2020), (7, 'France', 2018), (8, 'Germany', 2019), (9, 'Italy', 2020);", "sql": "SELECT country, MAX(year) FROM Peacekeeping_Operations GROUP BY country HAVING COUNT(year) = (SELECT MAX(pe.cnt) FROM (SELECT COUNT(*) AS cnt FROM Peacekeeping_Operations GROUP BY country) AS pe);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "How many citizen feedback records were received in the last week for public safety services?", "schema": "CREATE TABLE feedback (id INT, service VARCHAR(20), date DATE); INSERT INTO feedback (id, service, date) VALUES (1, 'Public Safety', '2022-01-01'), (2, 'Public Safety', '2022-01-07');", "sql": "SELECT COUNT(*) FROM feedback WHERE service = 'Public Safety' AND date >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Who are the developers that have created digital assets on the Binance Smart Chain?", "schema": "CREATE TABLE developers (developer_id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE digital_assets (asset_id INT, name VARCHAR(50), developer_id INT, network VARCHAR(50)); INSERT INTO digital_assets (asset_id, name, developer_id, network) VALUES (1, 'Asset1', 1, 'Ethereum'), (2, 'BSCDApp', 2, 'Binance Smart Chain'); INSERT INTO developers (developer_id, name, country) VALUES (1, 'Alice', 'USA'), (2, 'Bob', 'China');", "sql": "SELECT developers.name FROM developers INNER JOIN digital_assets ON developers.developer_id = digital_assets.developer_id WHERE digital_assets.network = 'Binance Smart Chain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 377).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (9,1,'-24926804.045047420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Which council members have not provided any feedback?", "schema": "CREATE TABLE City_Council (Council_ID INT PRIMARY KEY, Council_Member VARCHAR(50), District_Number INT); CREATE TABLE Policy_Feedback (Feedback_ID INT PRIMARY KEY, Council_ID INT, Policy_Area VARCHAR(30), Feedback VARCHAR(100)); INSERT INTO City_Council (Council_ID, Council_Member, District_Number) VALUES (1, 'Leila Alvarez', 1), (2, 'James Chen', 2), (3, 'Maria Lee', 3), (4, 'Alex Brown', 4); INSERT INTO Policy_Feedback (Feedback_ID, Council_ID, Policy_Area, Feedback) VALUES (1, 1, 'Housing', 'More affordable housing needed'), (2, 2, 'Transportation', 'Improve public transportation'), (3, 3, 'Education', 'Increase funding for schools');", "sql": "SELECT Council_Member FROM City_Council WHERE Council_ID NOT IN (SELECT Council_ID FROM Policy_Feedback);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game that had a loss of Johnson (9-8)?", "schema": "CREATE TABLE table_name_30 (date VARCHAR, loss VARCHAR)", "sql": "SELECT date FROM table_name_30 WHERE loss = 'johnson (9-8)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 159).", "schema": null, "sql": "select int4range(1, 2, '()');", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select int4range(1, 2, '()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the catalog number with the date November 16, 2004?", "schema": "CREATE TABLE table_name_15 (catalog VARCHAR, date VARCHAR)", "sql": "SELECT catalog FROM table_name_15 WHERE date = 'november 16, 2004';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of likes received by users in Germany and the UK?", "schema": "CREATE TABLE likes (id INT, post_id INT, user_id INT); INSERT INTO likes (id, post_id, user_id) VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1); CREATE TABLE posts (id INT, user_id INT); INSERT INTO posts (id, user_id) VALUES (1, 1), (2, 2), (3, 3); CREATE TABLE users (id INT, country VARCHAR(255)); INSERT INTO users (id, country) VALUES (1, 'Germany'), (2, 'UK'), (3, 'Canada');", "sql": "SELECT SUM(1) FROM (SELECT * FROM likes INNER JOIN posts ON likes.post_id = posts.id INNER JOIN users ON posts.user_id = users.id WHERE users.country IN ('Germany', 'UK')) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What is the average age of residents in each city, grouped by state, who have participated in public consultations?", "schema": "CREATE TABLE residents (id INT, age INT, city VARCHAR(50), state VARCHAR(50), participation BOOLEAN); INSERT INTO residents (id, age, city, state, participation) VALUES (1, 34, 'New York', 'NY', true), (2, 55, 'Los Angeles', 'CA', false); CREATE TABLE cities (id INT, name VARCHAR(50), state VARCHAR(50)); INSERT INTO cities (id, name, state) VALUES (1, 'New York', 'NY'), (2, 'Los Angeles', 'CA'); CREATE TABLE public_consultations (id INT, resident_id INT, consultation_topic VARCHAR(50)); INSERT INTO public_consultations (id, resident_id, consultation_topic) VALUES (1, 1, 'Urban Planning');", "sql": "SELECT state, AVG(age) as avg_age FROM residents r JOIN cities c ON r.city = c.name WHERE participation = true GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "What is the number of professional development courses taken by teachers in each department?", "schema": "CREATE TABLE teachers (teacher_id INT, teacher_name VARCHAR(50), department VARCHAR(20), course_id INT); INSERT INTO teachers (teacher_id, teacher_name, department, course_id) VALUES (1, 'John Doe', 'Math', 101), (2, 'Jane Smith', 'English', 102), (3, 'Alice Johnson', 'Science', 103), (4, 'Bob Williams', 'Math', 101), (5, 'Charlie Brown', 'English', 102); CREATE TABLE courses (course_id INT, course_name VARCHAR(50), category VARCHAR(20)); INSERT INTO courses (course_id, course_name, category) VALUES (101, 'Algebra I', 'Professional Development'), (102, 'Literature Review', 'Professional Development'), (103, 'Physics Lab', 'Regular Course'), (104, 'Calculus I', 'Professional Development');", "sql": "SELECT department, COUNT(DISTINCT course_id) FROM teachers t JOIN courses c ON t.course_id = c.course_id WHERE c.category = 'Professional Development' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "What is the total number of military equipment sales in the Asia-Pacific region?", "schema": "CREATE TABLE military_equipment_sales(id INT, region VARCHAR(20), equipment_type VARCHAR(20), quantity INT, sale_price FLOAT);", "sql": "SELECT SUM(quantity * sale_price) FROM military_equipment_sales WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total revenue for each restaurant location, grouped by state, excluding states with no revenue?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, state TEXT); CREATE TABLE revenue (restaurant_id INT, location_id INT, amount INT);", "sql": "SELECT restaurants.state, SUM(revenue.amount) FROM revenue JOIN restaurants ON revenue.restaurant_id = restaurants.id WHERE revenue.amount > 0 GROUP BY restaurants.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the most popular material based on inventory quantity?", "schema": "CREATE TABLE trends (id INT, material VARCHAR(255), popularity FLOAT); INSERT INTO trends (id, material, popularity) VALUES (3, 'Hemp', 0.85); INSERT INTO trends (id, material, popularity) VALUES (4, 'Tencel', 0.15);", "sql": "SELECT t.material, t.popularity FROM trends t JOIN (SELECT material, SUM(quantity) as total_quantity FROM inventory GROUP BY material ORDER BY total_quantity DESC LIMIT 1) i ON t.material = i.material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: During st kilda's home game, what was the number of people in the crowd?", "schema": "CREATE TABLE table_name_12 (crowd VARCHAR, home_team VARCHAR)", "sql": "SELECT crowd FROM table_name_12 WHERE home_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the number of successful satellite launches by company ABC", "schema": "CREATE TABLE satellites (id INT, name VARCHAR(50), launch_status VARCHAR(50), manufacturer VARCHAR(50), launch_date DATE);", "sql": "SELECT COUNT(*) FROM satellites WHERE launch_status = 'Success' AND manufacturer = 'ABC';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Record (example 69).", "schema": null, "sql": "end$$;\n\nupdate mutable set f2 = f2 || ' baz';\ntable mutable;\n\n--\n-- Domains of composite\n--\n\ncreate domain ordered_int8s as two_int8s check((value).q1 <= (value).q2);\n\ncreate function read_ordered_int8s(p ordered_int8s) returns int8 as $$\nbegin return p.q1 + p.q2; end\n$$ language plpgsql;\n\nselect read_ordered_int8s(row(1, 2));\nselect read_ordered_int8s(row(2, 1)); -- fail\n\ncreate function build_ordered_int8s(i int8, j int8) returns ordered_int8s as $$\nbegin return row(i,j); end\n$$ language plpgsql;\n\nselect build_ordered_int8s(1,2);\nselect build_ordered_int8s(2,1); -- fail\n\ncreate function build_ordered_int8s_2(i int8, j int8) returns ordered_int8s as $$\ndeclare r record; begin r := row(i,j); return r; end\n$$ language plpgsql;\n\nselect build_ordered_int8s_2(1,2);\nselect build_ordered_int8s_2(2,1); -- fail\n\ncreate function build_ordered_int8s_3(i int8, j int8) returns ordered_int8s as $$\ndeclare r two_int8s; begin r := row(i,j); return r; end\n$$ language plpgsql;\n\nselect build_ordered_int8s_3(1,2);\nselect build_ordered_int8s_3(2,1); -- fail\n\ncreate function build_ordered_int8s_4(i int8, j int8) returns ordered_int8s as $$\ndeclare r ordered_int8s; begin r := row(i,j); return r; end\n$$ language plpgsql;\n\nselect build_ordered_int8s_4(1,2);\nselect build_ordered_int8s_4(2,1); -- fail\n\ncreate function build_ordered_int8s_a(i int8, j int8) returns ordered_int8s[] as $$\nbegin return array[row(i,j), row(i,j+1)]; end\n$$ language plpgsql;\n\nselect build_ordered_int8s_a(1,2);\nselect build_ordered_int8s_a(2,1); -- fail\n\n-- check field assignment\ndo $$\ndeclare r ordered_int8s;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1591, "num_statements": 35} {"question": "Generate PostgreSQL SQL for: Name the authority for coed gender and chanel college", "schema": "CREATE TABLE table_name_67 (authority VARCHAR, gender VARCHAR, name VARCHAR)", "sql": "SELECT authority FROM table_name_67 WHERE gender = 'coed' AND name = 'chanel college';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What was the total revenue for the 'Desserts' menu category in the second quarter of 2021?", "schema": "CREATE TABLE restaurant_revenue(menu_category VARCHAR(20), revenue DECIMAL(10, 2), order_date DATE); INSERT INTO restaurant_revenue(menu_category, revenue, order_date) VALUES ('Desserts', 1500, '2021-04-01'), ('Desserts', 1800, '2021-05-03'), ('Desserts', 2000, '2021-06-01');", "sql": "SELECT SUM(revenue) FROM restaurant_revenue WHERE menu_category = 'Desserts' AND order_date >= '2021-04-01' AND order_date <= '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Pick # which has a Position of lb, and a CFL Team of winnipeg?", "schema": "CREATE TABLE table_name_74 (pick__number INTEGER, position VARCHAR, cfl_team VARCHAR)", "sql": "SELECT SUM(pick__number) FROM table_name_74 WHERE position = 'lb' AND cfl_team = 'winnipeg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the type that ends in 2009?", "schema": "CREATE TABLE table_name_60 (type VARCHAR, ends VARCHAR)", "sql": "SELECT type FROM table_name_60 WHERE ends = '2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total amount donated by each donor in 2021?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationDate DATE, DonationAmount FLOAT); INSERT INTO Donors (DonorID, DonorName, DonationDate, DonationAmount) VALUES (1, 'John Smith', '2021-01-01', 50.00), (2, 'Jane Doe', '2021-02-14', 100.00);", "sql": "SELECT DonorName, SUM(DonationAmount) as TotalDonation FROM Donors WHERE YEAR(DonationDate) = 2021 GROUP BY DonorName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "PostgreSQL regression test 'rowsecurity': Write the SELECT query (example 295).", "schema": null, "sql": "SELECT * FROM rec1; -- fail, mutual recursion via s.b. views\n\n--\n-- recursive RLS and VIEWs in policy\n--\nSET SESSION AUTHORIZATION regress_rls_alice;", "explanation": "Regression test for Rowsecurity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM rec1; -- fail, mutual recursion via s.b. views\n\n--\n-- recursive RLS and VIEWs in policy\n--\nSET SESSION AUTHORIZATION regress_rls_alice) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 152, "num_statements": 2} {"question": "Show an example of PostgreSQL ALTER COLLATION (example 2).", "schema": null, "sql": "ALTER COLLATION \"en_US\" OWNER TO joe;", "explanation": "PostgreSQL ALTER COLLATION command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Home Team Score at Windy Hill?", "schema": "CREATE TABLE table_name_64 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_64 WHERE venue = 'windy hill';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Count the number of sustainable textile suppliers, and show only those suppliers that have adopted sustainable practices in the last 5 years.", "schema": "CREATE TABLE TextileSuppliers (supplier TEXT, sustainable BOOLEAN, last_sustainable_update DATE); INSERT INTO TextileSuppliers (supplier, sustainable, last_sustainable_update) VALUES ('Supplier1', true, '2018-01-01'), ('Supplier2', false, '2015-01-01'), ('Supplier3', true, '2020-01-01'), ('Supplier4', true, '2017-01-01');", "sql": "SELECT supplier, COUNT(*) as sustainable_suppliers FROM TextileSuppliers WHERE sustainable = true AND last_sustainable_update >= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) GROUP BY supplier HAVING COUNT(*) > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "How many marine species have been discovered in the Southern Ocean since 2000?", "schema": "CREATE TABLE southern_ocean_species (species_name TEXT, year INTEGER, discovered BOOLEAN); INSERT INTO southern_ocean_species (species_name, year, discovered) VALUES ('Antarctic Krill', 2005, TRUE), ('Southern Ocean Squid', 2010, TRUE);", "sql": "SELECT COUNT(*) FROM southern_ocean_species WHERE year >= 2000 AND discovered = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 121).", "schema": null, "sql": "SELECT count(*) > 0 AS ok FROM pg_control_recovery();", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) > 0 AS ok FROM pg_control_recovery()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which College/Junior/Club Team (League) are from sweden?", "schema": "CREATE TABLE table_name_92 (college_junior_club_team__league_ VARCHAR, nationality VARCHAR)", "sql": "SELECT college_junior_club_team__league_ FROM table_name_92 WHERE nationality = 'sweden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the percentage of waste generated in the 'East Coast' region that is plastic, paper, or glass?", "schema": "CREATE TABLE waste_type (waste_type VARCHAR(50)); INSERT INTO waste_type (waste_type) VALUES ('Plastic'), ('Paper'), ('Glass'); CREATE TABLE waste_generation (city_name VARCHAR(50), region VARCHAR(50), waste_type VARCHAR(50), waste_metric INT); INSERT INTO waste_generation (city_name, region, waste_type, waste_metric) VALUES ('New York', 'East Coast', 'Plastic', 500), ('New York', 'East Coast', 'Paper', 600), ('New York', 'East Coast', 'Glass', 400);", "sql": "SELECT 100.0 * SUM(CASE WHEN waste_type IN ('Plastic', 'Paper', 'Glass') THEN waste_metric ELSE 0 END) / SUM(waste_metric) AS percentage FROM waste_generation WHERE region = 'East Coast';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "What is the name and description of the vulnerabilities with a specific severity level in the 'vulnerabilities' table?", "schema": "CREATE TABLE vulnerabilities (id INT, name VARCHAR(255), description TEXT, severity VARCHAR(50), reported_date DATE);", "sql": "SELECT name, description FROM vulnerabilities WHERE severity = 'High';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plperl' (example 28).", "schema": null, "sql": "SELECT roundtrip('[1, 2, 3]', 'ARRAY');", "explanation": "Example query from the 'jsonb_plperl' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "How many users from each state in the USA have a membership?", "schema": "CREATE TABLE users (id INT, state VARCHAR(50), membership BOOLEAN); INSERT INTO users (id, state, membership) VALUES (1, 'California', TRUE), (2, 'New York', FALSE), (3, 'Texas', TRUE), (4, 'Florida', TRUE);", "sql": "SELECT state, COUNT(*) as num_members FROM users WHERE state IN ('California', 'New York', 'Texas', 'Florida') AND membership = TRUE GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the total number of yellow cards given to players from Germany?", "schema": "CREATE TABLE card_stats (id INT, player TEXT, yellow_cards INT, country TEXT); INSERT INTO card_stats (id, player, yellow_cards, country) VALUES (1, 'Klose', 5, 'Germany'), (2, 'Schweinsteiger', 4, 'Germany'), (3, 'Lahm', 3, 'Germany');", "sql": "SELECT SUM(yellow_cards) FROM card_stats WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the price of the 'Burrito' menu item to $10.99", "schema": "CREATE TABLE menu_items (item_name VARCHAR(255), price DECIMAL(10,2)); INSERT INTO menu_items (item_name, price) VALUES ('Pizza', 12.99), ('Burrito', 9.99);", "sql": "UPDATE menu_items SET price = 10.99 WHERE item_name = 'Burrito';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the game is listed as 2, what is the score?", "schema": "CREATE TABLE table_name_81 (score VARCHAR, game VARCHAR)", "sql": "SELECT score FROM table_name_81 WHERE game = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Insert a new patient record for a 25 year old patient with a diagnosis of PTSD in the US", "schema": "CREATE SCHEMA mental_health; USE mental_health; CREATE TABLE patients (patient_id INT, diagnosis VARCHAR(50), age INT, country VARCHAR(50));", "sql": "INSERT INTO patients VALUES (8, 'PTSD', 25, 'US');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Top-25 with an Events of 20, and a Wins larger than 2?", "schema": "CREATE TABLE table_name_45 (top_25 INTEGER, events VARCHAR, wins VARCHAR)", "sql": "SELECT AVG(top_25) FROM table_name_45 WHERE events = 20 AND wins > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average production quantity for wells in the 'North Sea' that have a production quantity greater than 1000?", "schema": "CREATE TABLE wells (id INT, name VARCHAR(255), location VARCHAR(255), production_quantity INT); INSERT INTO wells (id, name, location, production_quantity) VALUES (1, 'Well A', 'North Sea', 1000), (2, 'Well B', 'North Sea', 1200), (3, 'Well C', 'North Sea', 1500);", "sql": "SELECT AVG(production_quantity) FROM wells WHERE location = 'North Sea' AND production_quantity > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 597).", "schema": null, "sql": "INSERT INTO temporal_partitioned_rng (id, valid_at) VALUES ('[5,6)', daterange('2018-01-01', '2018-02-01'));", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Find the chemical with the lowest quantity produced in the last 30 days.", "schema": "CREATE TABLE chemical_production_new2 (id INT PRIMARY KEY, chemical_name VARCHAR(50), quantity INT, production_date DATE);", "sql": "SELECT chemical_name, MIN(quantity) as min_quantity FROM chemical_production_new2 WHERE production_date > CURDATE() - INTERVAL 30 DAY GROUP BY chemical_name LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which modern house title translates to prison?", "schema": "CREATE TABLE table_name_46 (modern_title_of_house VARCHAR, translation VARCHAR)", "sql": "SELECT modern_title_of_house FROM table_name_46 WHERE translation = 'prison';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total revenue generated by eco-friendly hotels in Japan?", "schema": "CREATE TABLE eco_hotels_japan (hotel_id INT, hotel_name TEXT, country TEXT, revenue FLOAT); INSERT INTO eco_hotels_japan (hotel_id, hotel_name, country, revenue) VALUES (1, 'Eco-Hotel Kyoto', 'Japan', 120000), (2, 'Green Tokyo Inn', 'Japan', 150000);", "sql": "SELECT SUM(revenue) FROM eco_hotels_japan;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has a time of 7:28.66 and a rank less than 3?", "schema": "CREATE TABLE table_name_27 (country VARCHAR, rank VARCHAR, time VARCHAR)", "sql": "SELECT country FROM table_name_27 WHERE rank < 3 AND time = '7:28.66';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Find customers with multiple claims and 'Renters' policies in Canada.", "schema": "CREATE TABLE Claims (ClaimID INT, PolicyID INT); CREATE TABLE Policy (PolicyID INT, PolicyType VARCHAR(20), CustomerID INT, CustomerPostalCode VARCHAR(7), Country VARCHAR(20)); INSERT INTO Claims (ClaimID, PolicyID) VALUES (1, 1), (2, 2), (3, 3), (4, 3), (5, 5); INSERT INTO Policy (PolicyID, PolicyType, CustomerID, CustomerPostalCode, Country) VALUES (1, 'Homeowners', 101, 'M1M1M1', 'Canada'), (2, 'Auto', 102, 'A1A1A1', 'Canada'), (3, 'Renters', 103, 'M2M2M2', 'Canada'), (4, 'Life', 104, 'N1N1N1', 'Canada'), (5, 'Renters', 105, 'M2M2M2', 'Canada');", "sql": "SELECT Policy.CustomerID FROM Policy INNER JOIN Claims ON Policy.PolicyID = Claims.PolicyID WHERE Policy.PolicyType = 'Renters' AND Policy.Country = 'Canada' GROUP BY Policy.CustomerID HAVING COUNT(DISTINCT Claims.ClaimID) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 227, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 16).", "schema": null, "sql": "CREATE FUNCTION exists_any(hstore,text[])\nRETURNS bool\nAS 'MODULE_PATHNAME','hstore_exists_any'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which round was Brian Elder taken in?", "schema": "CREATE TABLE table_name_15 (round INTEGER, player VARCHAR)", "sql": "SELECT MIN(round) FROM table_name_15 WHERE player = 'brian elder';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total biomass of all marine species in the Arctic region, grouped by conservation status?\"", "schema": "CREATE TABLE marine_species_biomass (species_name VARCHAR(255), region VARCHAR(255), biomass FLOAT, conservation_status VARCHAR(255)); INSERT INTO marine_species_biomass (species_name, region, biomass, conservation_status) VALUES ('Polar Bear', 'Arctic', 500, 'Fully Protected'), ('Narwhal', 'Arctic', 300, 'Partially Protected'), ('Ringed Seal', 'Arctic', 200, 'Fully Protected');", "sql": "SELECT conservation_status, SUM(biomass) as total_biomass FROM marine_species_biomass WHERE region = 'Arctic' GROUP BY conservation_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the average number of heritage sites per country in the Americas?", "schema": "CREATE TABLE CountriesAmericas (id INT, name TEXT, region TEXT); INSERT INTO CountriesAmericas (id, name, region) VALUES (1, 'United States', 'Americas'), (2, 'Canada', 'Americas'); CREATE TABLE HeritageSitesAmericas (id INT, country_id INT, name TEXT); INSERT INTO HeritageSitesAmericas (id, country_id, name) VALUES (1, 1, 'Statue of Liberty'), (2, 1, 'Grand Canyon'), (3, 2, 'Niagara Falls'), (4, 2, 'CN Tower');", "sql": "SELECT AVG(site_count) FROM (SELECT COUNT(HeritageSitesAmericas.id) AS site_count FROM HeritageSitesAmericas GROUP BY HeritageSitesAmericas.country_id) AS SiteCountPerCountry;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many countries had a total freshwater withdrawal (km 3 /yr) where the agricultural use (m 3 /p/yr)(in %) was 428(62%)?", "schema": "CREATE TABLE table_15909409_2 (total_freshwater_withdrawal__km_3__yr_ VARCHAR, agricultural_use__m_3__p_yr__in__percentage_ VARCHAR)", "sql": "SELECT COUNT(total_freshwater_withdrawal__km_3__yr_) FROM table_15909409_2 WHERE agricultural_use__m_3__p_yr__in__percentage_ = '428(62%)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the total number of cases handled by attorney 'Alice Smith'?", "schema": "CREATE TABLE cases (case_id INT, attorney_name TEXT); INSERT INTO cases (case_id, attorney_name) VALUES (1, 'Alice Smith'), (2, 'Bob Johnson'), (3, 'Bob Johnson'), (4, 'Charlie Brown');", "sql": "SELECT COUNT(*) FROM cases WHERE attorney_name = 'Alice Smith';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type has an unspecified species and less than 367 genes?", "schema": "CREATE TABLE table_name_65 (type VARCHAR, species VARCHAR, genes VARCHAR)", "sql": "SELECT type FROM table_name_65 WHERE species = 'unspecified' AND genes < 367;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every denomination for the school Seymour college?", "schema": "CREATE TABLE table_22043925_1 (denomination VARCHAR, school VARCHAR)", "sql": "SELECT denomination FROM table_22043925_1 WHERE school = 'Seymour College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which assets did not incur any fault log? List the asset model.", "schema": "CREATE TABLE Fault_Log (asset_model VARCHAR, asset_id VARCHAR); CREATE TABLE Assets (asset_model VARCHAR, asset_id VARCHAR)", "sql": "SELECT asset_model FROM Assets WHERE NOT asset_id IN (SELECT asset_id FROM Fault_Log);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: where is the private/presbyterian institute", "schema": "CREATE TABLE table_1974632_1 (location VARCHAR, type VARCHAR)", "sql": "SELECT location FROM table_1974632_1 WHERE type = 'Private/Presbyterian';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of death for a rank below 14 and age of 103 years, 148 days?", "schema": "CREATE TABLE table_name_61 (death_date VARCHAR, rank VARCHAR, age__as_of_1_february_2014_ VARCHAR)", "sql": "SELECT death_date FROM table_name_61 WHERE rank < 14 AND age__as_of_1_february_2014_ = '103 years, 148 days';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the percentage of successful space missions by continent in 2020?", "schema": "CREATE TABLE Space_Missions (mission_date DATE, continent VARCHAR(255), success BOOLEAN); INSERT INTO Space_Missions (mission_date, continent, success) VALUES ('2020-01-01', 'North America', TRUE), ('2020-02-01', 'Asia', FALSE), ('2020-03-01', 'Europe', TRUE), ('2020-04-01', 'North America', TRUE), ('2020-05-01', 'Africa', FALSE);", "sql": "SELECT continent, (COUNT(success) FILTER (WHERE success = TRUE) * 100.0 / COUNT(*)) AS success_percentage FROM Space_Missions WHERE YEAR(mission_date) = 2020 GROUP BY continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 177, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 103).", "schema": null, "sql": "SELECT '92233720368547758.07'::money * 2::float8;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '92233720368547758.07'::money * 2::float8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 198).", "schema": null, "sql": "CREATE FUNCTION isneq(upc, ean13)\n\tRETURNS boolean\n\tAS 'int8eq'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who took third-place when there were 4 total wins?", "schema": "CREATE TABLE table_17632217_2 (third_place VARCHAR, total_wins VARCHAR)", "sql": "SELECT third_place FROM table_17632217_2 WHERE total_wins = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the number of mental health campaigns in Rio de Janeiro per month in 2019?", "schema": "CREATE TABLE campaigns (campaign_id INT, campaign_name TEXT, city TEXT, start_date DATE, end_date DATE); INSERT INTO campaigns (campaign_id, campaign_name, city, start_date, end_date) VALUES (1, 'Healthy Minds', 'Rio de Janeiro', '2019-05-01', '2020-04-30');", "sql": "SELECT EXTRACT(MONTH FROM start_date) as month, COUNT(*) as num_campaigns FROM campaigns WHERE city = 'Rio de Janeiro' AND EXTRACT(YEAR FROM start_date) = 2019 GROUP BY month ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "What is the total value of all loans issued to customers in London?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(50), city VARCHAR(50)); INSERT INTO customers (id, name, city) VALUES (1, 'John Doe', 'London'); CREATE TABLE loans (id INT, customer_id INT, product VARCHAR(50), quantity INT, interest_rate DECIMAL(10,2), loaned_at TIMESTAMP); INSERT INTO loans (id, customer_id, product, quantity, interest_rate, loaned_at) VALUES (1, 1, 'Car Loan', 10000, 0.05, NOW());", "sql": "SELECT SUM(l.quantity * l.interest_rate) as total_value FROM loans l INNER JOIN customers c ON l.customer_id = c.id WHERE c.city = 'London';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Which graduate students have not received any research grants in the past two years?", "schema": "CREATE TABLE students (student_id INT, name TEXT); INSERT INTO students (student_id, name) VALUES (1, 'Alice Johnson'), (2, 'Bob Brown'), (3, 'Claire White'); CREATE TABLE grants (grant_id INT, student_id INT, year INT, amount INT); INSERT INTO grants (grant_id, student_id, year, amount) VALUES (1, 1, 2021, 5000), (2, 2, 2022, 15000), (3, 1, 2023, 20000);", "sql": "SELECT s.name FROM students s LEFT JOIN grants g ON s.student_id = g.student_id AND g.year BETWEEN 2021 AND 2023 WHERE g.grant_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the total number of fish for each location, grouped by location, from the 'fish_stock' and 'sustainable_seafood' tables, including those with no records in either table?", "schema": "CREATE TABLE fish_stock (location VARCHAR(255), num_fish INT); CREATE TABLE sustainable_seafood (location VARCHAR(255), num_fish INT); INSERT INTO fish_stock (location, num_fish) VALUES ('Location A', 500), ('Location B', 600); INSERT INTO sustainable_seafood (location, num_fish) VALUES ('Location A', 450), ('Location B', 650); CREATE TABLE all_locations (location VARCHAR(255)); INSERT INTO all_locations (location) VALUES ('Location A'), ('Location B'), ('Location C');", "sql": "SELECT coalesce(f.location, l.location) as location, SUM(coalesce(f.num_fish, 0) + coalesce(s.num_fish, 0)) FROM all_locations l LEFT JOIN fish_stock f ON l.location = f.location LEFT JOIN sustainable_seafood s ON l.location = s.location GROUP BY coalesce(f.location, l.location);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 280, "num_statements": 1} {"question": "Insert a new record into the 'community_education' table with id 4, program 'Wildlife Conservation', and attendance 50.", "schema": "CREATE TABLE community_education (id INT, program VARCHAR(255), attendance INT); INSERT INTO community_education (id, program, attendance) VALUES (1, 'Biodiversity', 30), (2, 'Climate Change', 40), (3, 'Habitat Restoration', 60);", "sql": "INSERT INTO community_education (id, program, attendance) VALUES (4, 'Wildlife Conservation', 50);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the urban settlement when the city / municipality was kovin?", "schema": "CREATE TABLE table_2562572_7 (urban_settlement VARCHAR, city___municipality VARCHAR)", "sql": "SELECT urban_settlement FROM table_2562572_7 WHERE city___municipality = 'Kovin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Delete all records from the \"audience_demographics\" table where the \"age\" is less than 18", "schema": "CREATE TABLE audience_demographics (id INT PRIMARY KEY, age INT, country VARCHAR(255), gender VARCHAR(255));", "sql": "DELETE FROM audience_demographics WHERE age < 18;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the failure rate for SpaceX missions?", "schema": "CREATE TABLE space_missions (mission_id INT, mission_year INT, mission_status VARCHAR(10), mission_company VARCHAR(100));", "sql": "SELECT mission_company, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM space_missions WHERE mission_company = 'SpaceX') AS failure_rate FROM space_missions WHERE mission_status = 'failed' AND mission_company = 'SpaceX' GROUP BY mission_company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 238, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'index_including' (example 13).", "schema": null, "sql": "INSERT INTO tbl_include_unique2 SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;", "explanation": "DML from PostgreSQL core regression test for Index Including.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of Shaun Sabol?", "schema": "CREATE TABLE table_name_94 (nationality VARCHAR, player VARCHAR)", "sql": "SELECT nationality FROM table_name_94 WHERE player = 'shaun sabol';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the tries when tries against were 41, try bonus was 6, and had 317 points.", "schema": "CREATE TABLE table_name_35 (tries_for VARCHAR, points_against VARCHAR, try_bonus VARCHAR, tries_against VARCHAR)", "sql": "SELECT tries_for FROM table_name_35 WHERE try_bonus = '6' AND tries_against = '41' AND points_against = '317';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games had fewer than 118 opponents and more than 109 net points with an opponent of Washington?", "schema": "CREATE TABLE table_name_37 (game VARCHAR, opponent VARCHAR, opponents VARCHAR, nets_points VARCHAR)", "sql": "SELECT COUNT(game) FROM table_name_37 WHERE opponents < 118 AND nets_points > 109 AND opponent = 'washington';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the sponsor of motorola", "schema": "CREATE TABLE table_name_12 (team VARCHAR, sponsor VARCHAR)", "sql": "SELECT team FROM table_name_12 WHERE sponsor = 'motorola';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 20).", "schema": null, "sql": "SELECT '=' ~ '[[:punct:]]' COLLATE PG_C_UTF8; -- symbols are punctuation in posix\nSELECT 'a8a' ~ '[[:digit:]]' COLLATE PG_C_UTF8;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '=' ~ '[[:punct:]]' COLLATE PG_C_UTF8; -- symbols are punctuation in posix\nSELECT 'a8a' ~ '[[:digit:]]' COLLATE PG_C_UTF8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What is the highest NFL Draft that has jeff robinson as the player, with an overall pick less than 98?", "schema": "CREATE TABLE table_name_47 (nfl_draft INTEGER, player VARCHAR, overall_pick VARCHAR)", "sql": "SELECT MAX(nfl_draft) FROM table_name_47 WHERE player = 'jeff robinson' AND overall_pick < 98;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of people in ascending alphabetical order?", "schema": "CREATE TABLE People (Name VARCHAR)", "sql": "SELECT Name FROM People ORDER BY Name;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "How many unique countries are represented in the database?", "schema": "CREATE TABLE artists (id INT, name TEXT, country TEXT); INSERT INTO artists (id, name, country) VALUES (1, 'Eminem', 'USA'), (2, 'The Beatles', 'UK'), (3, 'Rammstein', 'Germany');", "sql": "SELECT COUNT(DISTINCT country) FROM artists;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 126).", "schema": null, "sql": "SELECT asinh(float8 'nan');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT asinh(float8 'nan')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "How many electronic music tracks were sold through online platforms in Q1 2022?", "schema": "CREATE TABLE tracks (id INT, title VARCHAR(255), genre VARCHAR(255), platform VARCHAR(255), sales INT); INSERT INTO tracks (id, title, genre, platform, sales) VALUES (1, 'Electricity', 'Electronic', 'iTunes', 150);", "sql": "SELECT SUM(sales) FROM tracks WHERE genre = 'Electronic' AND platform IN ('iTunes', 'Spotify', 'Google Play') AND YEAR(id) = 2022 AND QUARTER(id) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the title of the episode with series number 36?", "schema": "CREATE TABLE table_28146944_2 (title VARCHAR, no_in_series VARCHAR)", "sql": "SELECT title FROM table_28146944_2 WHERE no_in_series = 36;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 97).", "schema": null, "sql": "select '@ + 1'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '@ + 1'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "What is the total investment in the 'Technology' industry for projects started on or before 2021?", "schema": "CREATE TABLE EconomicDiversification (id INT, project_id INT, business_name VARCHAR(50), industry VARCHAR(50), investment DECIMAL(10,2), start_date DATE, end_date DATE); INSERT INTO EconomicDiversification (id, project_id, business_name, industry, investment, start_date, end_date) VALUES (1, 1, 'Green Energy Solutions', 'Renewable Energy', 75000.00, '2021-04-01', '2022-03-31'); INSERT INTO EconomicDiversification (id, project_id, business_name, industry, investment, start_date, end_date) VALUES (2, 2, 'Local Food Market', 'Technology', 35000.00, '2021-01-01', '2023-12-31');", "sql": "SELECT industry, SUM(investment) FROM EconomicDiversification WHERE start_date <= '2021-12-31' AND industry = 'Technology' GROUP BY industry;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 53).", "schema": null, "sql": "select sum(null::numeric) from generate_series(1,3);", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select sum(null::numeric) from generate_series(1,3)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Home team in Tie #23?", "schema": "CREATE TABLE table_name_80 (home_team VARCHAR, tie_no VARCHAR)", "sql": "SELECT home_team FROM table_name_80 WHERE tie_no = 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the maximum energy price for the 'Central' region in April 2022?", "schema": "CREATE TABLE energy_prices (id INT, region VARCHAR(50), price FLOAT, date DATE); INSERT INTO energy_prices (id, region, price, date) VALUES (1, 'Central', 70.5, '2022-04-01');", "sql": "SELECT region, MAX(price) AS max_price FROM energy_prices WHERE date BETWEEN '2022-04-01' AND '2022-04-30' AND region = 'Central' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Identify the programs with no donations in 2023.", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, DonationAmount DECIMAL(10,2)); INSERT INTO Programs (ProgramID, ProgramName, DonationAmount) VALUES (1, 'Healthcare', 1000.00), (2, 'Arts & Culture', 500.00), (3, 'Environment', 2000.00), (4, 'Social Services', 750.00); CREATE TABLE Donations (DonationID INT, DonorID INT, ProgramID INT, DonationAmount DECIMAL(10,2), DonationDate DATE); INSERT INTO Donations (DonationID, DonorID, ProgramID, DonationAmount, DonationDate) VALUES (1, 1, 1, 100.00, '2022-01-01'), (2, 2, 2, 10.00, '2022-02-03'), (3, 3, 3, 200.00, '2022-12-25');", "sql": "SELECT Programs.ProgramID, Programs.ProgramName FROM Programs LEFT JOIN Donations ON Programs.ProgramID = Donations.ProgramID WHERE DATE_FORMAT(DonationDate, '%Y') = '2023' AND Donations.DonationID IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent with a score of 5–4?", "schema": "CREATE TABLE table_name_36 (opponent VARCHAR, score VARCHAR)", "sql": "SELECT opponent FROM table_name_36 WHERE score = '5–4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the high assists for chauncey billups , carmelo anthony (18)", "schema": "CREATE TABLE table_17355408_4 (high_assists VARCHAR, high_points VARCHAR)", "sql": "SELECT high_assists FROM table_17355408_4 WHERE high_points = 'Chauncey Billups , Carmelo Anthony (18)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the total length of all tunnels in the state of Texas?", "schema": "CREATE TABLE Tunnels (id INT, name TEXT, state TEXT, length FLOAT); INSERT INTO Tunnels (id, name, state, length) VALUES (1, 'Houston Tunnel System', 'Texas', 8000.0); INSERT INTO Tunnels (id, name, state, length) VALUES (2, 'Dallas North Tunnel', 'Texas', 3500.0);", "sql": "SELECT SUM(length) FROM Tunnels WHERE state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the average ticket price for dance events in the city of Chicago?", "schema": "CREATE TABLE events (name VARCHAR(255), location VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2)); INSERT INTO events (name, location, category, price) VALUES ('Swan Lake', 'Chicago', 'Dance', 95.00), ('The Nutcracker', 'New York', 'Dance', 125.00), ('Hamilton', 'Chicago', 'Theatre', 225.00);", "sql": "SELECT AVG(price) FROM events WHERE location = 'Chicago' AND category = 'Dance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the tyre when the entrant is sa alfa romeo and the driver is luigi fagioli?", "schema": "CREATE TABLE table_name_87 (tyre VARCHAR, entrant VARCHAR, driver VARCHAR)", "sql": "SELECT tyre FROM table_name_87 WHERE entrant = 'sa alfa romeo' AND driver = 'luigi fagioli';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Incumbent has a District of California 5?", "schema": "CREATE TABLE table_name_99 (incumbent VARCHAR, district VARCHAR)", "sql": "SELECT incumbent FROM table_name_99 WHERE district = 'california 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the number of volunteers in each age group?", "schema": "CREATE TABLE Volunteers (id INT, name TEXT, age INT);", "sql": "SELECT FLOOR(age / 10) * 10 as age_group, COUNT(*) as number_of_volunteers FROM Volunteers GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the earliest date an artifact was excavated from 'Site E'?", "schema": "CREATE TABLE Site (SiteID VARCHAR(10), SiteName VARCHAR(20)); INSERT INTO Site (SiteID, SiteName) VALUES ('E', 'Site E'); CREATE TABLE Excavation (ExcavationID VARCHAR(10), SiteID VARCHAR(10), ExcavationDate DATE); INSERT INTO Excavation (ExcavationID, SiteID, ExcavationDate) VALUES ('1', 'E', '2022-01-01'), ('2', 'E', '2022-02-01'), ('3', 'E', '2022-03-01'), ('4', 'E', '2022-04-01');", "sql": "SELECT MIN(ExcavationDate) FROM Excavation WHERE SiteID = 'E';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the days with rain figure for the city of Santiago de Compostela?", "schema": "CREATE TABLE table_12837_1 (days_with_rain__year_summer_ VARCHAR, city_town VARCHAR)", "sql": "SELECT days_with_rain__year_summer_ FROM table_12837_1 WHERE city_town = 'Santiago de Compostela';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Calculate the percentage of emergency incidents in the east region that were responded to within 5 minutes, for each month in the past year.", "schema": "CREATE TABLE IncidentTimes (id INT, incident_id INT, incident_time TIME); CREATE TABLE EmergencyIncidents (id INT, district_id INT, incident_date DATE, region_id INT); INSERT INTO IncidentTimes (id, incident_id, incident_time) VALUES (1, 1, '12:00:00'), (2, 2, '21:00:00'), (3, 3, '06:00:00'), (4, 4, '18:00:00'); INSERT INTO EmergencyIncidents (id, district_id, incident_date, region_id) VALUES (1, 1, '2021-12-01', 1), (2, 2, '2021-12-02', 1), (3, 3, '2021-12-03', 1), (4, 4, '2021-12-04', 1);", "sql": "SELECT EXTRACT(MONTH FROM incident_date) as month, region_id, COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY EXTRACT(MONTH FROM incident_date), region_id) as pct_incidents_within_5_minutes FROM EmergencyIncidents e JOIN IncidentTimes i ON e.id = i.incident_id WHERE incident_time BETWEEN '00:00:00' AND '05:00:00' AND incident_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND CURRENT_DATE AND region_id = 2 GROUP BY EXTRACT(MONTH FROM incident_date), region_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 474, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the resting potential with an AP duration of 1.0 and a median giant fiber cell type?", "schema": "CREATE TABLE table_name_69 (resting_potential__mv_ VARCHAR, ap_duration__ms_ VARCHAR, cell_type VARCHAR)", "sql": "SELECT resting_potential__mv_ FROM table_name_69 WHERE ap_duration__ms_ = '1.0' AND cell_type = 'median giant fiber';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Insert new records in the satellite_image_analysis table for field 8 with image_quality score 90, taken on 2023-04-01", "schema": "CREATE TABLE satellite_image_analysis (field_id INT, image_quality INT, image_timestamp DATETIME);", "sql": "INSERT INTO satellite_image_analysis (field_id, image_quality, image_timestamp) VALUES (8, 90, '2023-04-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the week 12 nomination that had a week 1 nomination of Maria Maxine and a week 2 nomination of Henri Satu?", "schema": "CREATE TABLE table_name_1 (week_12 VARCHAR, week_1 VARCHAR, week_2 VARCHAR)", "sql": "SELECT week_12 FROM table_name_1 WHERE week_1 = 'maria maxine' AND week_2 = 'henri satu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What was the average donation amount by state in Q1 2022?", "schema": "CREATE TABLE Donations (id INT, state VARCHAR(2), donation_amount DECIMAL(5,2), donation_date DATE); INSERT INTO Donations (id, state, donation_amount, donation_date) VALUES (1, 'NY', 50.00, '2022-01-01'), (2, 'CA', 100.00, '2022-01-15'), (3, 'TX', 75.00, '2022-03-03');", "sql": "SELECT AVG(donation_amount) as avg_donation, state FROM Donations WHERE donation_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "How many female refugees were assisted by each organization in Africa in 2018?", "schema": "CREATE TABLE refugees (id INT, organization VARCHAR(255), location VARCHAR(255), assist_date DATE, gender VARCHAR(10), age INT); INSERT INTO refugees (id, organization, location, assist_date, gender, age) VALUES (1, 'UNHCR', 'Africa', '2018-02-12', 'Female', 34), (2, 'Red Cross', 'Africa', '2018-04-01', 'Male', 27), (3, 'Save the Children', 'Africa', '2018-03-21', 'Female', 19), (4, 'World Vision', 'Africa', '2018-05-05', 'Female', 25);", "sql": "SELECT organization, COUNT(*) as total_female_refugees FROM refugees WHERE location = 'Africa' AND YEAR(assist_date) = 2018 AND gender = 'Female' GROUP BY organization;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many assists does David Tomlinson have?", "schema": "CREATE TABLE table_name_71 (assists INTEGER, player VARCHAR)", "sql": "SELECT SUM(assists) FROM table_name_71 WHERE player = 'david tomlinson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the opponent in the final Gwinyai Tongoona?", "schema": "CREATE TABLE table_name_62 (date VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT date FROM table_name_62 WHERE opponent_in_the_final = 'gwinyai tongoona';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date did the Bills play the Houston Oilers before week 12?", "schema": "CREATE TABLE table_name_79 (date VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_79 WHERE week < 12 AND opponent = 'houston oilers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the percentage of cosmetic products that have a high allergen risk?", "schema": "CREATE TABLE product_ingredients (product_id INT, allergen_risk TEXT); INSERT INTO product_ingredients (product_id, allergen_risk) VALUES (1, 'low'), (2, 'medium'), (3, 'high'), (4, 'low'), (5, 'medium'), (6, 'high'), (7, 'low'), (8, 'medium'), (9, 'low'), (10, 'high');", "sql": "SELECT COUNT(*) as total_products, COUNT(*) FILTER (WHERE allergen_risk = 'high') * 100.0 / COUNT(*) as high_risk_percentage FROM product_ingredients;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest capacity for Fandok?", "schema": "CREATE TABLE table_name_84 (capacity INTEGER, team VARCHAR)", "sql": "SELECT MIN(capacity) FROM table_name_84 WHERE team = 'fandok';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which club has 16 league goals for a total of 20?", "schema": "CREATE TABLE table_name_2 (club VARCHAR, league_goals VARCHAR, total VARCHAR)", "sql": "SELECT club FROM table_name_2 WHERE league_goals = '16' AND total = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the landfill capacity for the country of Germany for the year 2025?'", "schema": "CREATE TABLE country_landfill_capacity (country VARCHAR(20), year INT, capacity INT); INSERT INTO country_landfill_capacity (country, year, capacity) VALUES ('Germany', 2025, 6000000);", "sql": "SELECT capacity FROM country_landfill_capacity WHERE country = 'Germany' AND year = 2025;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "How many electric vehicles are there in each state in the US?", "schema": "CREATE TABLE us_vehicles (state VARCHAR(20), vehicle_type VARCHAR(20), quantity INT);", "sql": "SELECT state, vehicle_type, SUM(quantity) AS total_electric_vehicles FROM us_vehicles WHERE vehicle_type = 'electric' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Identify the community health workers who speak the same language as their clients, if any.", "schema": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), language VARCHAR(20)); INSERT INTO community_health_workers (id, name, language) VALUES (1, 'Jamila Smith', 'English'), (2, 'Pedro Rodriguez', 'Spanish'), (3, 'Thanh Nguyen', 'Vietnamese'); CREATE TABLE clients (id INT, name VARCHAR(50), language VARCHAR(20)); INSERT INTO clients (id, name, language) VALUES (1, 'John Smith', 'English'), (2, 'Maria Garcia', 'Spanish'), (3, 'Tran Nguyen', 'Vietnamese'), (4, 'Alex Johnson', 'Russian');", "sql": "SELECT chw.name AS worker_name, cl.name AS client_name, chw.language FROM community_health_workers chw, clients cl WHERE chw.language = cl.language;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Atlantic Europe when age is 10,000 years?", "schema": "CREATE TABLE table_22860_1 (atlantic_europe VARCHAR, age__before_ VARCHAR)", "sql": "SELECT atlantic_europe FROM table_22860_1 WHERE age__before_ = '10,000 years';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "How many public meetings were held by all departments in the month of July for the year 2021?", "schema": "CREATE TABLE Department (id INT, name VARCHAR(255)); CREATE TABLE Meeting (id INT, department_id INT, datetime DATETIME); INSERT INTO Department (id, name) VALUES (1, 'Education'), (2, 'Healthcare'), (3, 'Transportation'), (4, 'Parks and Recreation'); INSERT INTO Meeting (id, department_id, datetime) VALUES (1, 1, '2021-01-01 10:00:00'), (2, 4, '2021-07-01 14:00:00'), (3, 2, '2021-09-15 09:00:00'), (4, 4, '2021-07-15 11:00:00'), (5, 3, '2021-10-01 16:00:00');", "sql": "SELECT COUNT(*) FROM Meeting INNER JOIN Department ON Meeting.department_id = Department.id WHERE MONTH(datetime) = 7 AND YEAR(datetime) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the main use of the structure that was in redfield, arkansas before 2004?", "schema": "CREATE TABLE table_name_77 (main_use VARCHAR, year VARCHAR, town VARCHAR)", "sql": "SELECT main_use FROM table_name_77 WHERE year < 2004 AND town = 'redfield, arkansas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Create a table 'teams' with columns 'name', 'sport', and 'location'", "schema": "CREATE TABLE athletes (name VARCHAR(100), sport VARCHAR(50), country VARCHAR(50), age INT);", "sql": "CREATE TABLE teams (name VARCHAR(100), sport VARCHAR(50), location VARCHAR(100));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the First Issue date of Bamboo Blade?", "schema": "CREATE TABLE table_name_89 (first_issue VARCHAR, title VARCHAR)", "sql": "SELECT first_issue FROM table_name_89 WHERE title = 'bamboo blade';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How much water is conserved in New York through water conservation initiatives?", "schema": "CREATE TABLE conservation_initiatives (state VARCHAR(20), conservation_amount FLOAT); INSERT INTO conservation_initiatives (state, conservation_amount) VALUES ('New York', 200), ('California', 300), ('Texas', 150);", "sql": "SELECT conservation_amount FROM conservation_initiatives WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which name has notes b and the game of tennis?", "schema": "CREATE TABLE table_name_13 (name VARCHAR, sport VARCHAR, notes VARCHAR)", "sql": "SELECT name FROM table_name_13 WHERE sport = 'tennis' AND notes = 'b';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many draws took place for team Corinthians with more than 5 losses?", "schema": "CREATE TABLE table_name_81 (drawn VARCHAR, team VARCHAR, lost VARCHAR)", "sql": "SELECT COUNT(drawn) FROM table_name_81 WHERE team = 'corinthians' AND lost > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'unaccent' (example 14).", "schema": null, "sql": "SELECT unaccent('unaccent', '1½');", "explanation": "Example query from the 'unaccent' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who took the winning slot at the Vojens venue?", "schema": "CREATE TABLE table_name_29 (winners VARCHAR, venue VARCHAR)", "sql": "SELECT winners FROM table_name_29 WHERE venue = 'vojens';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Calculate the average salinity level in the Pacific and Atlantic oceans.", "schema": "CREATE TABLE salinity (location VARCHAR(255), year INT, avg_salinity FLOAT);", "sql": "SELECT location, AVG(avg_salinity) FROM salinity WHERE location IN ('Pacific Ocean', 'Atlantic Ocean') GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the average recycling rate (%) for urban areas in 2019?", "schema": "CREATE TABLE recycling_rates(region VARCHAR(255), year INT, recycling_rate FLOAT);", "sql": "SELECT AVG(recycling_rate) FROM recycling_rates WHERE region LIKE '%urban%' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Which countries have the most excavation sites?", "schema": "CREATE TABLE Sites (SiteID INT, Country VARCHAR(50), Type VARCHAR(50)); INSERT INTO Sites (SiteID, Country, Type) VALUES (1, 'Egypt', 'Ancient Ruins'); INSERT INTO Sites (SiteID, Country, Type) VALUES (2, 'Mexico', 'Mayan Temples'); INSERT INTO Sites (SiteID, Country, Type) VALUES (3, 'Peru', 'Incan Fortress'); INSERT INTO Sites (SiteID, Country, Type) VALUES (4, 'Egypt', 'Pyramid'); INSERT INTO Sites (SiteID, Country, Type) VALUES (5, 'Mexico', 'Aztec City'); CREATE TABLE Excavations (ExcavationID INT, Site VARCHAR(50)); INSERT INTO Excavations (ExcavationID, Site) VALUES (1, 'Ancient Ruins'); INSERT INTO Excavations (ExcavationID, Site) VALUES (2, 'Mayan Temples'); INSERT INTO Excavations (ExcavationID, Site) VALUES (3, 'Incan Fortress'); INSERT INTO Excavations (ExcavationID, Site) VALUES (4, 'Pyramid'); INSERT INTO Excavations (ExcavationID, Site) VALUES (5, 'Aztec City');", "sql": "SELECT S.Country, COUNT(E.Site) AS Excavation_Count FROM Sites S INNER JOIN Excavations E ON S.Site = E.Site GROUP BY S.Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the average area of land used for farming by indigenous farmers?", "schema": "CREATE TABLE farmer (id INT PRIMARY KEY, name VARCHAR(50), gender VARCHAR(10), is_indigenous BOOLEAN, area_in_hectares INT); INSERT INTO farmer (id, name, gender, is_indigenous, area_in_hectares) VALUES (1, 'Jamal', 'Male', FALSE, 3000), (2, 'Aisha', 'Female', FALSE, 2500), (3, 'Samir', 'Male', FALSE, 2000), (4, 'Nina', 'Female', TRUE, 5000);", "sql": "SELECT gender, AVG(area_in_hectares) FROM farmer WHERE is_indigenous = TRUE GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT ARE THE RACES WHEN FLAPS ARE ZERO, PODIUMS ARE LARGER THAN 0, SEASON IS 2008, AND POLE SMALLER THAN 1?", "schema": "CREATE TABLE table_name_93 (races INTEGER, pole VARCHAR, season VARCHAR, flaps VARCHAR, podiums VARCHAR)", "sql": "SELECT SUM(races) FROM table_name_93 WHERE flaps = 0 AND podiums > 0 AND season = '2008' AND pole < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the average number of emergencies handled per day by police stations in districts with low crime rates?", "schema": "CREATE TABLE district_crime_rates (did INT, rate INT, PRIMARY KEY(did)); CREATE TABLE station_emergencies (eid INT, sid INT, time TIMESTAMP, PRIMARY KEY(eid), FOREIGN KEY(sid) REFERENCES stations(sid));", "sql": "SELECT AVG(COUNT(se.eid) / TIMESTAMPDIFF(DAY, (SELECT MIN(time) FROM station_emergencies se2 WHERE se2.sid = se.sid), (SELECT MAX(time) FROM station_emergencies se3 WHERE se3.sid = se.sid))) FROM station_emergencies se JOIN districts d ON se.sid IN (SELECT sid FROM stations WHERE did = d.did) WHERE d.rate < (SELECT AVG(rate) FROM district_crime_rates);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 354, "num_statements": 1} {"question": "What was the total revenue for the state of Colorado in the second quarter of 2022?", "schema": "CREATE TABLE sales (id INT, state VARCHAR(50), quarter INT, revenue FLOAT); INSERT INTO sales (id, state, quarter, revenue) VALUES (1, 'California', 1, 25000.0), (2, 'California', 2, 30000.0), (3, 'Colorado', 1, 20000.0), (4, 'Colorado', 2, 28000.0);", "sql": "SELECT SUM(revenue) FROM sales WHERE state = 'Colorado' AND quarter = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average age of patients who received therapy sessions in the state of California?", "schema": "CREATE TABLE patients (patient_id INT, age INT, gender VARCHAR(20), state VARCHAR(20)); INSERT INTO patients (patient_id, age, gender, state) VALUES (1, 35, 'Female', 'California'); INSERT INTO patients (patient_id, age, gender, state) VALUES (2, 42, 'Male', 'Texas'); CREATE TABLE therapy_sessions (session_id INT, patient_id INT, therapist_id INT, session_date DATE); INSERT INTO therapy_sessions (session_id, patient_id, therapist_id, session_date) VALUES (1, 1, 3, '2021-03-15');", "sql": "SELECT AVG(patients.age) FROM patients INNER JOIN therapy_sessions ON patients.patient_id = therapy_sessions.patient_id WHERE patients.state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.1.0--1.2.0 (assertion 67).", "schema": null, "sql": "-- isnt_normal_function( schema, function )\nCREATE OR REPLACE FUNCTION isnt_normal_function( NAME, NAME )\nRETURNS TEXT AS $$\n SELECT _func_compare(\n $1, $2, NOT _type_func('f', $1, $2),\n 'Function ' || quote_ident($1) || '.' || quote_ident($2) || '() should not be a normal function'\n );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.1.0--1.2.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 324, "num_statements": 2} {"question": "List all clinical trials conducted for oncology drugs in Mexico, along with their approval status and completion date.", "schema": "CREATE TABLE clinical_trials (id INT, drug_name VARCHAR(255), trial_location VARCHAR(255), trial_status VARCHAR(255), completion_date DATE); INSERT INTO clinical_trials (id, drug_name, trial_location, trial_status, completion_date) VALUES (1, 'DrugB', 'Mexico', 'Approved', '2018-12-31'); INSERT INTO clinical_trials (id, drug_name, trial_location, trial_status, completion_date) VALUES (2, 'DrugC', 'Mexico', 'Pending', '2021-03-01');", "sql": "SELECT * FROM clinical_trials WHERE trial_location = 'Mexico' AND drug_name LIKE '%oncology%' AND (trial_status = 'Approved' OR trial_status = 'Pending');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type of the match with a win result and Michael Gomez as the opponent?", "schema": "CREATE TABLE table_name_79 (type VARCHAR, res VARCHAR, opponent VARCHAR)", "sql": "SELECT type FROM table_name_79 WHERE res = 'win' AND opponent = 'michael gomez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Find the number of new threat intelligence reports created per day in the last week.", "schema": "CREATE TABLE threat_intelligence (report_id INT, creation_date DATE); INSERT INTO threat_intelligence VALUES (1, '2021-07-01'), (2, '2021-07-02'), (3, '2021-07-02');", "sql": "SELECT creation_date, COUNT(*) OVER (PARTITION BY creation_date) FROM threat_intelligence WHERE creation_date >= CURRENT_DATE - INTERVAL '7 days';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 146, "num_statements": 1} {"question": "What is the maximum donation amount received from a single donor in the last month?", "schema": "CREATE TABLE Donations (donorID INT, donationDate DATE, donationAmount DECIMAL(10,2)); INSERT INTO Donations (donorID, donationDate, donationAmount) VALUES (1, '2022-03-02', 150.50), (2, '2022-03-15', 200.00), (3, '2022-03-27', 125.75);", "sql": "SELECT MAX(donationAmount) FROM Donations WHERE donationDate >= '2022-03-01' AND donationDate <= '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the highest grid for Patrick Carpentier?", "schema": "CREATE TABLE table_name_77 (grid INTEGER, driver VARCHAR)", "sql": "SELECT MAX(grid) FROM table_name_77 WHERE driver = 'patrick carpentier';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average claim amount for policyholders living in 'California'?", "schema": "CREATE TABLE policyholders (id INT, name TEXT, state TEXT); INSERT INTO policyholders (id, name, state) VALUES (1, 'John Doe', 'California'); INSERT INTO policyholders (id, name, state) VALUES (2, 'Jane Smith', 'Texas'); CREATE TABLE claims (id INT, policyholder_id INT, amount INT); INSERT INTO claims (id, policyholder_id, amount) VALUES (1, 1, 5000); INSERT INTO claims (id, policyholder_id, amount) VALUES (2, 1, 2000); INSERT INTO claims (id, policyholder_id, amount) VALUES (3, 2, 3000);", "sql": "SELECT AVG(amount) FROM claims JOIN policyholders ON claims.policyholder_id = policyholders.id WHERE policyholders.state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "List the names of countries that have both eco-friendly hotels and cultural heritage sites, but no virtual tours.", "schema": "CREATE TABLE eco_hotels (hotel_id INT, country VARCHAR(20), name VARCHAR(50)); INSERT INTO eco_hotels (hotel_id, country, name) VALUES (1, 'Germany', 'Green Hotel'), (2, 'Portugal', 'Eco Lodge'), (3, 'France', 'Sustainable Suites'); CREATE TABLE cultural_sites (site_id INT, country VARCHAR(20), type VARCHAR(20)); INSERT INTO cultural_sites (site_id, country, type) VALUES (1, 'Germany', 'heritage'), (2, 'Portugal', 'heritage'), (3, 'France', 'heritage'); CREATE TABLE virtual_tours (tour_id INT, country VARCHAR(20), type VARCHAR(20)); INSERT INTO virtual_tours (tour_id, country, type) VALUES (1, 'Germany', 'virtual'), (2, 'Portugal', 'virtual');", "sql": "(SELECT country FROM eco_hotels WHERE name IS NOT NULL) INTERSECT (SELECT country FROM cultural_sites WHERE type = 'heritage') EXCEPT (SELECT country FROM virtual_tours WHERE type = 'virtual');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the location when the stolen ends is 12 and shot pct is 77%?", "schema": "CREATE TABLE table_1543845_63 (locale VARCHAR, stolen_ends VARCHAR, shot_pct VARCHAR)", "sql": "SELECT locale FROM table_1543845_63 WHERE stolen_ends = 12 AND shot_pct = '77%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "List all space missions that have involved international collaboration, along with the participating countries.", "schema": "CREATE TABLE space_missions (id INT, mission_name VARCHAR(50), launch_date DATE, country_of_origin VARCHAR(50)); INSERT INTO space_missions (id, mission_name, launch_date, country_of_origin) VALUES (1, 'Artemis I', '2022-08-29', 'USA'); INSERT INTO space_missions (id, mission_name, launch_date, country_of_origin) VALUES (2, 'ExoMars', '2016-03-14', 'Europe');", "sql": "SELECT mission_name, country_of_origin FROM space_missions WHERE country_of_origin <> 'USA' AND country_of_origin <> (SELECT country_of_origin FROM space_missions WHERE mission_name = 'Artemis I');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Find the average rating of movies directed by female directors.", "schema": "CREATE TABLE movies (id INT, title VARCHAR(255), release_year INT, director_gender VARCHAR(10), rating DECIMAL(2,1)); INSERT INTO movies (id, title, release_year, director_gender, rating) VALUES (1, 'Movie1', 2020, 'Female', 8.2), (2, 'Movie2', 2019, 'Male', 7.1), (3, 'Movie3', 2018, 'Female', 9.0);", "sql": "SELECT AVG(rating) FROM movies WHERE director_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the 1st leg that had a 4-2 aggregate?", "schema": "CREATE TABLE table_name_33 (aggregate VARCHAR)", "sql": "SELECT 1 AS st_leg FROM table_name_33 WHERE aggregate = '4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the course type on 22 May?", "schema": "CREATE TABLE table_name_13 (type VARCHAR, date VARCHAR)", "sql": "SELECT type FROM table_name_13 WHERE date = '22 may';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What kind of Replaced has a Outgoing manager of guillermo sanguinetti?", "schema": "CREATE TABLE table_name_95 (replaced_by VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT replaced_by FROM table_name_95 WHERE outgoing_manager = 'guillermo sanguinetti';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Identify customers who have made transactions in both the 'Stocks' and 'Bonds' categories.", "schema": "CREATE TABLE transactions (id INT, customer_id INT, category VARCHAR(50), amount DECIMAL(10,2)); INSERT INTO transactions (id, customer_id, category, amount) VALUES (1, 101, 'Stocks', 500.00), (2, 102, 'Bonds', 350.00), (3, 103, 'Stocks', 700.00), (4, 104, 'Bonds', 600.00), (5, 105, 'Stocks', 400.00), (6, 105, 'Bonds', 800.00);", "sql": "SELECT customer_id FROM transactions WHERE category = 'Stocks' INTERSECT SELECT customer_id FROM transactions WHERE category = 'Bonds';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total erp w of class c3, which has a frequency mhz less than 89.9?", "schema": "CREATE TABLE table_name_58 (erp_w VARCHAR, class VARCHAR, frequency_mhz VARCHAR)", "sql": "SELECT COUNT(erp_w) FROM table_name_58 WHERE class = 'c3' AND frequency_mhz < 89.9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "List all ports that have had cargo handled on a Sunday.", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50)); CREATE TABLE cargo (cargo_id INT, port_id INT, weight FLOAT, handling_date DATE);", "sql": "SELECT DISTINCT port_name FROM cargo c JOIN ports p ON c.port_id = p.port_id WHERE DAYOFWEEK(handling_date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'postgres_fdw' (item 360).", "schema": null, "sql": "-- Testing USING OPERATOR() in ORDER BY within aggregate.\n-- For this, we need user defined operators along with operator family and\n-- operator class. Create those and then add them in extension. Note that\n-- user defined objects are considered unshippable unless they are part of\n-- the extension.\ncreate operator public.<^ (\n leftarg = int4,\n rightarg = int4,\n procedure = int4eq\n);", "explanation": "SQL definition from the 'postgres_fdw' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 387, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/pageinspect/pageinspect--1.2--1.3.sql */\n\n-- complain if script is sourced in psql, rather than via ALTER EXTENSION\n\\echo Use \"ALTER EXTENSION pageinspect UPDATE TO '1.3'\" to load this file. \\quit\n\n--\n-- brin_page_type()\n--\nCREATE FUNCTION brin_page_type(IN page bytea)\nRETURNS text\nAS 'MODULE_PATHNAME', 'brin_page_type'\nLANGUAGE C STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 351, "num_statements": 1} {"question": "What is the maximum ocean acidification level in the Arctic Ocean?", "schema": "CREATE TABLE ocean_acidification (location text, level numeric); INSERT INTO ocean_acidification (location, level) VALUES ('Arctic Ocean', 8.2), ('Atlantic Ocean', 7.9);", "sql": "SELECT MAX(level) FROM ocean_acidification WHERE location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What skip has denmark as the country?", "schema": "CREATE TABLE table_name_58 (skip VARCHAR, country VARCHAR)", "sql": "SELECT skip FROM table_name_58 WHERE country = 'denmark';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'xml' (example 136).", "schema": null, "sql": "CREATE VIEW xmlview3 AS SELECT xmlelement(name element, xmlattributes (1 as \":one:\", 'deuce' as two), 'content&');", "explanation": "DDL from PostgreSQL core regression test for Xml.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score in a place of t5 in the United States?", "schema": "CREATE TABLE table_name_25 (score VARCHAR, place VARCHAR, nation VARCHAR)", "sql": "SELECT score FROM table_name_25 WHERE place = 't5' AND nation = 'united states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 359).", "schema": null, "sql": "drop trigger child2_insert_trig on child2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "How many countries are in the 'countries' table?", "schema": "CREATE TABLE countries (id INT PRIMARY KEY, name VARCHAR(255), region VARCHAR(255)); INSERT INTO countries (id, name, region) VALUES (1, 'Canada', 'North America'), (2, 'Mexico', 'North America'), (3, 'Brazil', 'South America'), (4, 'Argentina', 'South America'), (5, 'India', 'Asia'), (6, 'China', 'Asia');", "sql": "SELECT COUNT(*) FROM countries;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 238).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (5,6,'1539707782.76899778633766');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most tier 1 capital for irish nationwide", "schema": "CREATE TABLE table_22368322_2 (_€_million VARCHAR, tier_1_capital INTEGER, institution VARCHAR)", "sql": "SELECT MAX(tier_1_capital), _€_million FROM table_22368322_2 WHERE institution = 'Irish Nationwide';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the average budget for climate mitigation projects in Europe?", "schema": "CREATE TABLE climate_mitigation_projects (project_id INT, project_name VARCHAR(255), location VARCHAR(255), budget DECIMAL(10,2)); INSERT INTO climate_mitigation_projects (project_id, project_name, location, budget) VALUES (1, 'Carbon Capture in Germany', 'Germany', 4000000.00), (2, 'Forest Conservation in France', 'France', 3000000.00), (3, 'Clean Transportation in UK', 'UK', 5000000.00);", "sql": "SELECT AVG(budget) FROM climate_mitigation_projects WHERE location = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 346).", "schema": null, "sql": "insert into rules_foo values(1001);", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Update the amount of effective_altruism with id 1 to 2000000", "schema": "CREATE TABLE effective_altruism (id INT PRIMARY KEY, name VARCHAR(100), amount INT, cause VARCHAR(20));", "sql": "UPDATE effective_altruism SET amount = 2000000 WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many mining equipment units are there in the 'equipment_inventory' table, broken down by type?", "schema": "CREATE TABLE equipment_inventory (id INT, equipment_type VARCHAR(50), quantity INT); INSERT INTO equipment_inventory (id, equipment_type, quantity) VALUES (1, 'Excavator', 10), (2, 'Drill', 15), (3, 'Haul Truck', 20);", "sql": "SELECT equipment_type, quantity FROM equipment_inventory;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the lowest rank of a player who played in 2012?", "schema": "CREATE TABLE table_name_50 (rank INTEGER, event VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_50 WHERE event = '2012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Which communities have the highest engagement levels in language preservation in South Asia?", "schema": "CREATE TABLE Communities (community_id INT PRIMARY KEY, community_name VARCHAR(255), region VARCHAR(255), engagement_level INT); INSERT INTO Communities (community_id, community_name, region, engagement_level) VALUES (2, 'Siddi', 'South Asia', 5);", "sql": "SELECT c.community_name, c.region, l.language, l.script, l.speakers, c.engagement_level FROM Communities c INNER JOIN Languages l ON c.region = l.region WHERE c.engagement_level = (SELECT MAX(engagement_level) FROM Communities WHERE region = 'South Asia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "Delete all fish records with a weight less than 1kg from the fish_inventory table", "schema": "CREATE TABLE fish_inventory (fish_id INT, species VARCHAR(50), weight FLOAT, location VARCHAR(50));", "sql": "DELETE FROM fish_inventory WHERE weight < 1.0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What p;layer attended Concordia College?", "schema": "CREATE TABLE table_name_40 (player VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_name_40 WHERE college = 'concordia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_jsontable': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT * FROM JSON_TABLE(jsonb '\"a\"', '$' COLUMNS (a jsonb EXISTS PATH '$.a'));", "explanation": "Regression test for Sqljson Jsontable in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM JSON_TABLE(jsonb '\"a\"', '$' COLUMNS (a jsonb EXISTS PATH '$.a'))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "What is the average number of donations per unique donor in the past year?", "schema": "CREATE TABLE donor_donations (donor_id INT, donation_year INT); INSERT INTO donor_donations (donor_id, donation_year) VALUES (1, 2021), (2, 2021), (3, 2021), (4, 2021), (5, 2021);", "sql": "SELECT AVG(cnt) AS avg_donations_per_donor FROM (SELECT donor_id, COUNT(*) AS cnt FROM donor_donations WHERE donation_year = YEAR(CURRENT_DATE) - 1 GROUP BY donor_id) subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "What is the minimum number of military aircraft maintenance requests recorded for the Navy in the year 2022?", "schema": "CREATE TABLE maintenance_requests (request_id INT, service_branch VARCHAR(255), request_date DATE); INSERT INTO maintenance_requests (request_id, service_branch, request_date) VALUES (1, 'Air Force', '2022-01-01'), (2, 'Navy', '2022-02-02'), (3, 'Air Force', '2022-03-03');", "sql": "SELECT MIN(COUNT(*)) FROM maintenance_requests WHERE service_branch = 'Navy' AND EXTRACT(YEAR FROM request_date) = 2022 GROUP BY service_branch;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Show an example of PostgreSQL PSQL-REF (example 5).", "schema": null, "sql": "INSERT INTO tbls1 VALUES ($1, $2) \\parse stmt1 \\bind_named stmt1 'first value' 'second value' \\g;", "explanation": "PostgreSQL PSQL-REF command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What was the total revenue for each salesperson in the month of January?", "schema": "CREATE TABLE salesperson (id INT, name TEXT, revenue FLOAT); INSERT INTO salesperson (id, name, revenue) VALUES (1, 'John', 5000.00), (2, 'Jane', 7000.00);", "sql": "SELECT name, SUM(revenue) FROM salesperson WHERE EXTRACT(MONTH FROM date) = 1 GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the average donation amount for each cause in Q3 2021?", "schema": "CREATE TABLE donations (id INT, cause_id INT, amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, cause_id, amount, donation_date) VALUES (1, 1, 50.00, '2021-07-01'), (2, 2, 100.00, '2021-07-15'), (3, 1, 75.00, '2021-08-03'), (4, 3, 200.00, '2021-09-01'), (5, 2, 150.00, '2021-07-30'), (6, 1, 125.00, '2021-08-25');", "sql": "SELECT cause_id, AVG(amount) as avg_donation_amount FROM donations WHERE donation_date BETWEEN '2021-07-01' AND '2021-09-30' GROUP BY cause_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date successor seated for delegate seat established", "schema": "CREATE TABLE table_224837_4 (date_successor_seated VARCHAR, reason_for_change VARCHAR)", "sql": "SELECT date_successor_seated FROM table_224837_4 WHERE reason_for_change = 'Delegate seat established';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'domain' (example 50).", "schema": null, "sql": "create domain domainchar4arr varchar(4)[2][3];", "explanation": "DDL from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'truncate' (example 15).", "schema": null, "sql": "CREATE TABLE trunc_d (a int REFERENCES trunc_c);", "explanation": "DDL from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "How many public outreach events have been held at each excavation site, and what was the average attendance?", "schema": "CREATE TABLE ExcavationSites (SiteID int, SiteName varchar(50), Location varchar(50)); CREATE TABLE PublicOutreach (EventID int, SiteID int, EventType varchar(20), Attendance int);", "sql": "SELECT ExcavationSites.SiteName, AVG(PublicOutreach.Attendance) AS AverageAttendance, COUNT(PublicOutreach.EventID) AS NumberOfEvents FROM ExcavationSites INNER JOIN PublicOutreach ON ExcavationSites.SiteID = PublicOutreach.SiteID GROUP BY ExcavationSites.SiteName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 265, "num_statements": 1} {"question": "What's the total donation amount per quarter?", "schema": "CREATE TABLE Donations (DonationID INT, DonorID INT, DonationDate DATE, Amount FLOAT); INSERT INTO Donations (DonationID, DonorID, DonationDate, Amount) VALUES (1, 1, '2022-01-01', 100.00), (2, 2, '2022-02-15', 150.00);", "sql": "SELECT DATE_PART('quarter', DonationDate) AS Quarter, SUM(Amount) FROM Donations GROUP BY Quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the years when playoffs is 115?", "schema": "CREATE TABLE table_name_51 (years VARCHAR, playoffs VARCHAR)", "sql": "SELECT years FROM table_name_51 WHERE playoffs = 115;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance on May 13?", "schema": "CREATE TABLE table_name_76 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT attendance FROM table_name_76 WHERE date = 'may 13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 70).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : 4},\n {\"attributes\" : [2,3], \"ndistinct\" : 4}]', 'pg_ndistinct');", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : 4},\n {\"attributes\" : [2,3], \"ndistinct\" : 4}]', 'pg_ndistinct')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the English name of the month abbreviated as มิ.ย.?", "schema": "CREATE TABLE table_180802_2 (english_name VARCHAR, abbr VARCHAR)", "sql": "SELECT english_name FROM table_180802_2 WHERE abbr = 'มิ.ย.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of players who have participated in esports events in each country?", "schema": "CREATE TABLE EsportsParticipation (PlayerID INT, Country VARCHAR(50), EventID INT); INSERT INTO EsportsParticipation (PlayerID, Country, EventID) VALUES (1, 'USA', 1), (2, 'Canada', 2), (3, 'Mexico', 3), (4, 'Germany', 4), (5, 'France', 5), (6, 'USA', 6), (7, 'Canada', 7), (8, 'Mexico', 8), (9, 'USA', 9), (10, 'Brazil', 10);", "sql": "SELECT Country, COUNT(DISTINCT PlayerID) FROM EsportsParticipation GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the explainability rating for the AI system named 'AI Judge'?", "schema": "CREATE TABLE explainable_ai (ai_system TEXT, rating FLOAT); INSERT INTO explainable_ai (ai_system, rating) VALUES ('AI Judge', 0.75), ('AI Translator', 0.90), ('AI Artist', 0.60);", "sql": "SELECT rating FROM explainable_ai WHERE ai_system = 'AI Judge';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "List the number of investments made in 'renewable energy' and 'green infrastructure' strategies, by year.", "schema": "CREATE TABLE investments_strategies_3 (id INT, investment_year INT, strategy VARCHAR(30), investment_amount FLOAT); INSERT INTO investments_strategies_3 (id, investment_year, strategy, investment_amount) VALUES (1, 2019, 'renewable energy', 120000), (2, 2020, 'green infrastructure', 185000), (3, 2018, 'renewable energy', 175000);", "sql": "SELECT investment_year, COUNT(*) FROM investments_strategies_3 WHERE strategy IN ('renewable energy', 'green infrastructure') GROUP BY investment_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "List all mobile subscribers who have exceeded their monthly data limit in the last month.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_limit DECIMAL(5,2), data_usage DECIMAL(5,2), last_update DATE); INSERT INTO mobile_subscribers (subscriber_id, data_limit, data_usage, last_update) VALUES (1, 3.0, 3.2, '2022-01-15'), (2, 4.0, 3.8, '2022-01-07'), (3, 5.0, 4.9, '2022-01-20');", "sql": "SELECT subscriber_id FROM mobile_subscribers WHERE data_usage > data_limit AND last_update >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Show the total military innovation budget by fiscal year", "schema": "CREATE TABLE military_innovation (id INT, fiscal_year INT, budget DECIMAL(10, 2)); INSERT INTO military_innovation (id, fiscal_year, budget) VALUES (1, 2018, 50000), (2, 2019, 55000), (3, 2020, 60000), (4, 2021, 65000);", "sql": "SELECT fiscal_year, SUM(budget) as total_budget FROM military_innovation GROUP BY fiscal_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Update the funding amount for startup with id 1 in the 'funding' table", "schema": "funding(id, startup_id, funding_round, funding_amount, date_announced)", "sql": "UPDATE funding SET funding_amount = 1000000 WHERE startup_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Rank Points, when Event is \"WC Milan\", and when Shooter is \"Lalita Yauhleuskaya ( AUS )\"?", "schema": "CREATE TABLE table_name_77 (rank_points VARCHAR, event VARCHAR, shooter VARCHAR)", "sql": "SELECT rank_points FROM table_name_77 WHERE event = 'wc milan' AND shooter = 'lalita yauhleuskaya ( aus )';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Identify the total budget allocated for public health and education services in the city of Seattle and Austin, removing any duplicates.", "schema": "CREATE TABLE HealthEducation (service VARCHAR(20), city VARCHAR(20), budget INT); INSERT INTO HealthEducation (service, city, budget) VALUES ('Public Health', 'Seattle', 6000000), ('Education', 'Seattle', 5000000), ('Public Health', 'Austin', 6500000), ('Education', 'Austin', 4500000);", "sql": "SELECT DISTINCT(service), SUM(budget) FROM HealthEducation WHERE city IN ('Seattle', 'Austin') GROUP BY service;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Insert a new policy with policy ID 3, policy type 'Homeowners', and effective date '2022-01-01'", "schema": "CREATE TABLE policy (policy_id INT, policy_type VARCHAR(20), effective_date DATE);", "sql": "INSERT INTO policy (policy_id, policy_type, effective_date) VALUES (3, 'Homeowners', '2022-01-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 113).", "schema": null, "sql": "select jsonb_path_query('{\"a\": {\"b\": 1}}', 'lax $.**{0 to last}');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('{\"a\": {\"b\": 1}}', 'lax $.**{0 to last}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 73).", "schema": null, "sql": "SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did Trinidad and Tobago play?", "schema": "CREATE TABLE table_name_99 (place VARCHAR, country VARCHAR)", "sql": "SELECT place FROM table_name_99 WHERE country = 'trinidad and tobago';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average age of children supported by 'Save the Children' in 'South America'?", "schema": "CREATE TABLE children (id INT, name VARCHAR(255), age INT, location VARCHAR(255), supported_by VARCHAR(255), support_date DATE); INSERT INTO children (id, name, age, location, supported_by, support_date) VALUES (1, 'James Smith', 12, 'South America', 'Save the Children', '2022-01-01');", "sql": "SELECT AVG(age) FROM children WHERE location = 'South America' AND supported_by = 'Save the Children';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 315).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_type( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games played when the points for is 369?", "schema": "CREATE TABLE table_name_89 (played VARCHAR, points_for VARCHAR)", "sql": "SELECT played FROM table_name_89 WHERE points_for = '369';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 969).", "schema": null, "sql": "select 'null'::jsonb::int8;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 'null'::jsonb::int8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 27, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Etymology ranked 12?", "schema": "CREATE TABLE table_name_58 (etymology VARCHAR, rank VARCHAR)", "sql": "SELECT etymology FROM table_name_58 WHERE rank = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (plan_hypertable_inline, item 4).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION test_f(_ts bigint)\nRETURNS SETOF test LANGUAGE SQL STABLE\nas $f$\n SELECT DISTINCT ON (a) * FROM test WHERE b >= _ts AND b <= _ts + 2\n$f$;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 166, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 209).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (4,6,'113543048739697485358574290.758354267447744932153707340542459183720907885610125346262898114677742971240785031722334497858930434531517077525413654346644836353208132641713415396062580605566225794048569430676355036264762949452090151450855446984773994337170590068740235544320694721909983307239491151139099779296496785240814600627140543144068640768857707110930453204162312973998304574796413938461971472337040811785231390930046688391955000749644938061585377150632133417156866197053052425576957646564943278156977176976876921235395711611898108821587442609611001702344783440618040704066809035404237786023075676374788819144406909313755996914145273176359246052899650387182222905558751208368173052381982668563471143298720677965028880626152749773712037769548408324298835212547215352657271696665387200792785056233953536347605130973626194099064678842085');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 878, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'tablespace' (example 98).", "schema": null, "sql": "CREATE TABLE testschema.dflt (a int PRIMARY KEY) PARTITION BY LIST (a) TABLESPACE regress_tblspace;", "explanation": "DDL from PostgreSQL core regression test for Tablespace.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 99, "num_statements": 1} {"question": "What is the average number of labor rights violations for each union in the Americas in the last year?", "schema": "CREATE TABLE unions (id INT, name TEXT, location TEXT); INSERT INTO unions (id, name, location) VALUES (1, 'Union X', 'USA'), (2, 'Union Y', 'Canada'); CREATE TABLE violations (id INT, union_id INT, date DATE, violation_count INT); INSERT INTO violations (id, union_id, date, violation_count) VALUES (1, 1, '2021-02-15', 3), (2, 2, '2021-03-01', 5);", "sql": "SELECT u.name, AVG(v.violation_count) FROM unions u JOIN violations v ON u.id = v.union_id WHERE u.location LIKE 'Americas%' AND v.date >= DATEADD(year, -1, CURRENT_DATE) GROUP BY u.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Find the average donation amount for each program.", "schema": "CREATE TABLE donations (donation_id INT, donor_id INT, program_id INT, amount_donated DECIMAL(10, 2)); INSERT INTO donations VALUES (1, 1, 1, 500.00), (2, 2, 2, 350.00), (3, 1, 1, 200.00); CREATE TABLE programs (program_id INT, program_name TEXT, budget DECIMAL(10, 2)); INSERT INTO programs VALUES (1, 'Education', 10000.00), (2, 'Health', 15000.00); CREATE TABLE donors (donor_id INT, name TEXT); INSERT INTO donors VALUES (1, 'Jamila Johnson'), (2, 'Jose Hernandez');", "sql": "SELECT programs.program_name, AVG(donations.amount_donated) AS avg_donation FROM donations INNER JOIN programs ON donations.program_id = programs.program_id GROUP BY programs.program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 25).", "schema": null, "sql": "SELECT index('a.1.2.3.4.5.6','1.2');", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 78).", "schema": null, "sql": "SELECT * FROM check_test(\n function_lang_is( 'why', 'sql', 'whatever' ),\n false,\n 'function_lang_is(non-func, sql, desc)',\n 'whatever',\n ' Function why() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What shows for Unit with a status of nomen oblitum?", "schema": "CREATE TABLE table_name_16 (unit VARCHAR, status VARCHAR)", "sql": "SELECT unit FROM table_name_16 WHERE status = 'nomen oblitum';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the average play time for each 'Role-playing' game on PC, by publisher?", "schema": "CREATE TABLE Games (Id INT, Name VARCHAR(100), Genre VARCHAR(50), Platform VARCHAR(50), Publisher VARCHAR(100), PlayTime FLOAT); INSERT INTO Games VALUES (1, 'GameG', 'Action', 'PC', 'Publisher1', 20.5), (2, 'GameH', 'Role-playing', 'VR', 'Publisher2', 35.2), (3, 'GameI', 'Action', 'Console', 'Publisher1', 18.4), (4, 'GameJ', 'Role-playing', 'PC', 'Publisher3', 45.6), (5, 'GameK', 'Role-playing', 'Console', 'Publisher3', 30.5), (6, 'GameL', 'Action', 'VR', 'Publisher2', 52.1), (7, 'GameM', 'Role-playing', 'PC', 'Publisher1', 25.8);", "sql": "SELECT Publisher, AVG(PlayTime) AS Avg_PlayTime FROM Games WHERE Genre = 'Role-playing' AND Platform = 'PC' GROUP BY Publisher;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the rank for athlete hauffe, seifert, kaeufer, adamski?", "schema": "CREATE TABLE table_name_88 (rank INTEGER, athlete VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_88 WHERE athlete = 'hauffe, seifert, kaeufer, adamski';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the leading scorer of the Villa De Los Barrios visitor?", "schema": "CREATE TABLE table_name_56 (leading_scorer VARCHAR, visitor VARCHAR)", "sql": "SELECT leading_scorer FROM table_name_56 WHERE visitor = 'villa de los barrios';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game in which Danny Granger (30) did the high points?", "schema": "CREATE TABLE table_27756164_2 (score VARCHAR, high_points VARCHAR)", "sql": "SELECT score FROM table_27756164_2 WHERE high_points = 'Danny Granger (30)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Delete all records from the 'oil_reservoirs' table where the oil_volume_bbls is less than 500000000", "schema": "CREATE TABLE oil_reservoirs (reservoir_id INT PRIMARY KEY, reservoir_name VARCHAR(255), discovered_year INT, oil_volume_bbls BIGINT);", "sql": "DELETE FROM oil_reservoirs WHERE oil_volume_bbls < 500000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the outgoing manager for position in table being 11th", "schema": "CREATE TABLE table_17039232_3 (outgoing_manager VARCHAR, position_in_table VARCHAR)", "sql": "SELECT outgoing_manager FROM table_17039232_3 WHERE position_in_table = '11th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Update the ticket price for basketball matches in the 'basketball_tickets' table in January to 50?", "schema": "CREATE TABLE basketball_tickets (ticket_id INT, match_id INT, price DECIMAL(5,2), date DATE);", "sql": "UPDATE basketball_tickets SET price = 50 WHERE MONTH(date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average price of ethically sourced clothing items?", "schema": "CREATE TABLE products (product_id INT, category VARCHAR(20), is_ethically_sourced BOOLEAN, price INT); INSERT INTO products (product_id, category, is_ethically_sourced, price) VALUES (1, 'clothing', true, 50), (2, 'electronics', false, 200), (3, 'clothing', true, 75);", "sql": "SELECT AVG(products.price) FROM products WHERE products.category = 'clothing' AND products.is_ethically_sourced = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Delete all records in the shared_bikes table in Chicago.", "schema": "CREATE TABLE shared_bikes (bike_id INT, city VARCHAR(20), is_electric BOOLEAN); INSERT INTO shared_bikes (bike_id, city, is_electric) VALUES (1, 'New York', true), (2, 'Chicago', true), (3, 'New York', false);", "sql": "DELETE FROM shared_bikes WHERE city = 'Chicago';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the average salary of government employees in Washington DC, and how many of them are there?", "schema": "CREATE TABLE employees (name VARCHAR(255), city VARCHAR(255), salary DECIMAL(10,2), government BOOLEAN); INSERT INTO employees (name, city, salary, government) VALUES ('John Doe', 'Washington DC', 80000.00, TRUE), ('Jane Smith', 'Washington DC', 90000.00, TRUE);", "sql": "SELECT AVG(salary) FROM employees WHERE city = 'Washington DC' AND government = TRUE; SELECT COUNT(*) FROM employees WHERE city = 'Washington DC' AND government = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 2} {"question": "What is the percentage of new hires who identify as LGBTQ+ in the HR department in the past year?", "schema": "CREATE TABLE Employees (EmployeeID INT, HireDate DATE, Community VARCHAR(25), Department VARCHAR(25)); INSERT INTO Employees (EmployeeID, HireDate, Community, Department) VALUES (1, '2022-01-01', 'LGBTQ+', 'HR'), (2, '2022-02-15', 'Allied', 'Marketing'), (3, '2022-02-15', 'LGBTQ+', 'IT'), (4, '2021-12-01', 'LGBTQ+', 'HR');", "sql": "SELECT Department, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Employees WHERE HireDate >= DATEADD(year, -1, GETDATE())) AS Percentage FROM Employees WHERE Community = 'LGBTQ+' AND Department = 'HR' GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won on 2007-04-14", "schema": "CREATE TABLE table_name_66 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_66 WHERE date = '2007-04-14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Points have an Average smaller than 1, a Played larger than 38, and a Team of gimnasia de la plata?", "schema": "CREATE TABLE table_name_73 (points INTEGER, team VARCHAR, average VARCHAR, played VARCHAR)", "sql": "SELECT SUM(points) FROM table_name_73 WHERE average < 1 AND played > 38 AND team = 'gimnasia de la plata';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "List the types of military innovation from South Korea.", "schema": "CREATE TABLE sk_innovation (id INT, country VARCHAR(50), type VARCHAR(50)); INSERT INTO sk_innovation (id, country, type) VALUES (1, 'South Korea', 'Drone Swarms'), (2, 'South Korea', 'Stealth Technology'), (3, 'South Korea', 'Artificial Intelligence'), (4, 'South Korea', 'Cyber Warfare');", "sql": "SELECT DISTINCT type FROM sk_innovation WHERE country = 'South Korea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "How many units of each garment type were sold in 2021?", "schema": "CREATE TABLE sales (sale_id INT, product_id INT, sale_date DATE, units INT); CREATE TABLE products (product_id INT, product_name VARCHAR(50), product_type VARCHAR(50));", "sql": "SELECT p.product_type, SUM(s.units) as total_units FROM sales s JOIN products p ON s.product_id = p.product_id WHERE s.sale_date BETWEEN '2021-01-01' AND '2021-12-31' GROUP BY p.product_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 620).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What percentage of tourists visiting Sydney speak English?", "schema": "CREATE TABLE language_stats (id INT, city VARCHAR(20), country VARCHAR(10), language VARCHAR(10), num_tourists INT); INSERT INTO language_stats (id, city, country, language, num_tourists) VALUES (1, 'Sydney', 'Australia', 'English', 50000), (2, 'Sydney', 'China', 'Mandarin', 20000), (3, 'Sydney', 'USA', 'English', 30000);", "sql": "SELECT (SUM(CASE WHEN language = 'English' THEN num_tourists ELSE 0 END) * 100.0 / SUM(num_tourists)) AS percentage FROM language_stats WHERE city = 'Sydney';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Free polite has a Genitive 3 of *n(i)-ami?", "schema": "CREATE TABLE table_name_40 (free VARCHAR, genitive_3 VARCHAR)", "sql": "SELECT free AS polite FROM table_name_40 WHERE genitive_3 = '*n(i)-ami';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times is the score 98–90?", "schema": "CREATE TABLE table_11959669_6 (location_attendance VARCHAR, score VARCHAR)", "sql": "SELECT COUNT(location_attendance) FROM table_11959669_6 WHERE score = '98–90';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show the total impact investment in the Healthcare sector for the year 2022.", "schema": "CREATE TABLE impact_investments (id INT, investment_id INT, sector VARCHAR(255), investment_amount FLOAT, investment_date DATE); INSERT INTO impact_investments (id, investment_id, sector, investment_amount, investment_date) VALUES (1, 3, 'Healthcare', 500000.0, '2022-03-22');", "sql": "SELECT SUM(investment_amount) FROM impact_investments WHERE sector = 'Healthcare' AND investment_date BETWEEN '2022-01-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Delete community health worker records that have not been updated in the last 2 years.", "schema": "CREATE TABLE community_health_workers (id INT, name TEXT, last_update DATE); INSERT INTO community_health_workers (id, name, last_update) VALUES (1, 'Sophia Lee', '2019-01-01'), (2, 'Daniel Park', '2020-01-01');", "sql": "DELETE FROM community_health_workers WHERE last_update < DATE_SUB(CURDATE(), INTERVAL 2 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Champion has a Coeff of 1.000?", "schema": "CREATE TABLE table_name_85 (champions VARCHAR, coeff VARCHAR)", "sql": "SELECT champions FROM table_name_85 WHERE coeff = '1.000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total energy produced by solar power plants in Australia?", "schema": "CREATE TABLE solar_production (country VARCHAR(20), energy NUMERIC(12,2)); INSERT INTO solar_production (country, energy) VALUES ('Australia', 1500.0), ('Australia', 1600.5), ('Australia', 1400.0), ('Australia', 1750.2);", "sql": "SELECT SUM(energy) FROM solar_production WHERE country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'generated_stored' (example 20).", "schema": null, "sql": "CREATE TABLE gtest_err_7b (a int PRIMARY KEY, b int GENERATED ALWAYS AS (row_number() OVER (ORDER BY a)) STORED);", "explanation": "DDL from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who's ranked less than 2?", "schema": "CREATE TABLE table_name_32 (player VARCHAR, rank INTEGER)", "sql": "SELECT player FROM table_name_32 WHERE rank < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the total oil production in the Gulf of Mexico in 2019?", "schema": "CREATE TABLE production_figures (well_id INT, year INT, oil_production INT, gas_production INT); INSERT INTO production_figures (well_id, year, oil_production, gas_production) VALUES (1, 2019, 120000, 50000); INSERT INTO production_figures (well_id, year, oil_production, gas_production) VALUES (2, 2018, 130000, 60000); INSERT INTO production_figures (well_id, year, oil_production, gas_production) VALUES (3, 2019, 110000, 45000);", "sql": "SELECT SUM(oil_production) FROM production_figures WHERE year = 2019 AND region = 'Gulf of Mexico';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the 3rd Party in the Election of 1922?", "schema": "CREATE TABLE table_name_35 (election VARCHAR)", "sql": "SELECT 3 AS rd_party FROM table_name_35 WHERE election = '1922';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Identify the number of military equipment repairs for each type in the last month", "schema": "CREATE TABLE EquipmentRepairs (id INT, repair_date DATE, equipment_type VARCHAR(50), repair_cost FLOAT); INSERT INTO EquipmentRepairs (id, repair_date, equipment_type, repair_cost) VALUES (1, '2022-02-15', 'Artillery', 5000);", "sql": "SELECT equipment_type, COUNT(*) FROM EquipmentRepairs WHERE repair_date >= CURDATE() - INTERVAL 1 MONTH GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Show the number of garments made from ethical materials, grouped by garment type.", "schema": "CREATE TABLE garment_type_cost (id INT, garment_type VARCHAR(255), production_cost DECIMAL(10,2));", "sql": "SELECT garment_type, COUNT(*) AS garment_count FROM garment_type_cost GROUP BY garment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Which suppliers are located in Canada and have a sustainability rating of 4 or higher?", "schema": "CREATE TABLE Suppliers (id INT PRIMARY KEY, supplier_name VARCHAR(255), country VARCHAR(100), sustainability_rating INT); INSERT INTO Suppliers (id, supplier_name, country, sustainability_rating) VALUES (1, 'Green Farms', 'Canada', 4), (2, 'Tropical Fruits', 'Brazil', 3), (3, 'Ocean Harvest', 'Norway', 5);", "sql": "SELECT supplier_name FROM Suppliers WHERE country = 'Canada' AND sustainability_rating >= 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'aggregates' (example 305).", "schema": null, "sql": "create or replace view agg_view1 as\n select aggfns(a,b,c order by c using ~<~)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);", "explanation": "DDL from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the minimum AI adoption score for hotels in 'Africa'?", "schema": "CREATE TABLE ai_adoption (hotel_id INT, score INT);", "sql": "SELECT MIN(score) FROM ai_adoption WHERE country = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the results of the game at Doha?", "schema": "CREATE TABLE table_name_51 (result VARCHAR, venue VARCHAR)", "sql": "SELECT result FROM table_name_51 WHERE venue = 'doha';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "How many Freedom of Information Act (FOIA) requests were submitted per state in 'foia_requests' table?", "schema": "CREATE TABLE foia_requests (request_id INT, request_date DATE, request_state VARCHAR(255));", "sql": "SELECT request_state, COUNT(request_id) AS foia_requests_per_state FROM foia_requests GROUP BY request_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the total number of customers and their total assets value for each investment strategy?", "schema": "CREATE TABLE customers (id INT, name TEXT, age INT, country TEXT, assets FLOAT, investment_strategy TEXT); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (1, 'John Doe', 45, 'USA', 250000.00, 'Conservative'); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (2, 'Jane Smith', 34, 'Canada', 320000.00, 'Moderate'); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (3, 'Alice Johnson', 29, 'UK', 450000.00, 'Aggressive'); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (4, 'Bob Brown', 51, 'UK', 150000.00, 'Conservative'); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (5, 'Charlie Davis', 48, 'USA', 800000.00, 'Aggressive'); INSERT INTO customers (id, name, age, country, assets, investment_strategy) VALUES (6, 'David Kim', 38, 'Singapore', 520000.00, 'Moderate');", "sql": "SELECT investment_strategy, COUNT(*), SUM(assets) FROM customers GROUP BY investment_strategy;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which rounds do Arciero Wines sponsor a March 85c chassis?", "schema": "CREATE TABLE table_name_7 (rounds VARCHAR, sponsor_s_ VARCHAR, chassis VARCHAR)", "sql": "SELECT rounds FROM table_name_7 WHERE sponsor_s_ = 'arciero wines' AND chassis = 'march 85c';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the total number of electric bikes in the city of Seattle?", "schema": "CREATE TABLE if not exists bike_share (id INT, city VARCHAR(20), bike_type VARCHAR(20), quantity INT);INSERT INTO bike_share (id, city, bike_type, quantity) VALUES (1, 'Seattle', 'electric_bike', 500), (2, 'Portland', 'electric_bike', 350), (3, 'Seattle', 'classic_bike', 800);", "sql": "SELECT SUM(quantity) FROM bike_share WHERE city = 'Seattle' AND bike_type = 'electric_bike';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many won the Men's Open if the players are from Sweden?", "schema": "CREATE TABLE table_182298_5 (mens_open VARCHAR, country VARCHAR)", "sql": "SELECT COUNT(mens_open) FROM table_182298_5 WHERE country = 'Sweden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total number of songs for each artist?", "schema": "CREATE TABLE Artist (ArtistID INT, ArtistName VARCHAR(50)); CREATE TABLE Song (SongID INT, SongName VARCHAR(50), ArtistID INT); INSERT INTO Artist (ArtistID, ArtistName) VALUES (1, 'Taylor Swift'), (2, 'BTS'), (3, 'Adele'); INSERT INTO Song (SongID, SongName, ArtistID) VALUES (1, 'Shake it Off', 1), (2, 'Blank Space', 1), (3, 'Dynamite', 2), (4, 'Butter', 2), (5, 'Rolling in the Deep', 3);", "sql": "SELECT ArtistName, COUNT(SongName) OVER (PARTITION BY ArtistID) AS SongCount FROM Artist JOIN Song ON Artist.ArtistID = Song.ArtistID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 261).", "schema": null, "sql": "SELECT '2011-03-27 01:59:59 Europe/Moscow'::timestamptz;", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-27 01:59:59 Europe/Moscow'::timestamptz) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the dominant religion 2002 for меленци", "schema": "CREATE TABLE table_2562572_35 (dominant_religion__2002_ VARCHAR, cyrillic_name_other_names VARCHAR)", "sql": "SELECT dominant_religion__2002_ FROM table_2562572_35 WHERE cyrillic_name_other_names = 'Меленци';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "How many media representation initiatives are there in the EU?", "schema": "CREATE TABLE Countries (Country TEXT, MediaRepresentationInitiatives INT); INSERT INTO Countries (Country, MediaRepresentationInitiatives) VALUES ('Germany', 4), ('France', 3), ('Italy', 2), ('Spain', 1);", "sql": "SELECT SUM(MediaRepresentationInitiatives) FROM Countries WHERE Country IN ('Germany', 'France', 'Italy', 'Spain', 'Belgium', 'Netherlands', 'Luxembourg', 'Ireland', 'Austria', 'Portugal', 'Finland', 'Sweden', 'Denmark', 'Estonia', 'Latvia', 'Lithuania', 'Slovakia', 'Slovenia', 'Czech Republic', 'Hungary', 'Poland', 'Romania', 'Bulgaria', 'Croatia', 'Malta', 'Cyprus');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 371, "num_statements": 1} {"question": "Calculate the average ticket price for concerts in Canada.", "schema": "CREATE TABLE concerts (id INT, artist_id INT, location TEXT, price DECIMAL);", "sql": "SELECT AVG(price) FROM concerts WHERE location LIKE '%Canada%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many picks are there with an affiliation is the University of California Norcal Lamorinda United?", "schema": "CREATE TABLE table_25518547_2 (pick__number VARCHAR, affiliation VARCHAR)", "sql": "SELECT COUNT(pick__number) FROM table_25518547_2 WHERE affiliation = 'University of California NorCal Lamorinda United';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the maximum R&D expenditure per year for drugs that were approved after 2017?", "schema": "CREATE TABLE rd_expenditure (id INT PRIMARY KEY, drug_id INT, year INT, amount DECIMAL(10,2)); CREATE TABLE drugs (id INT PRIMARY KEY, name VARCHAR(255), manufacturer VARCHAR(255), approval_date DATE);", "sql": "SELECT MAX(amount) as max_annual_rd_expenditure FROM rd_expenditure JOIN drugs ON rd_expenditure.drug_id = drugs.id WHERE approval_date > '2017-01-01' GROUP BY rd_expenditure.year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Who is the manufacturer of 'DrugD'?", "schema": "CREATE TABLE drug_info (drug_name TEXT, manufacturer TEXT);", "sql": "SELECT manufacturer FROM drug_info WHERE drug_name = 'DrugD';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the match on a clay surface with an outcome of winner, at the tournament ciudad juárez, and an Opponent in the final of estefania craciún?", "schema": "CREATE TABLE table_name_48 (score VARCHAR, opponent_in_the_final VARCHAR, tournament VARCHAR, surface VARCHAR, outcome VARCHAR)", "sql": "SELECT score FROM table_name_48 WHERE surface = 'clay' AND outcome = 'winner' AND tournament = 'ciudad juárez' AND opponent_in_the_final = 'estefania craciún';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season did they play queensland at wicket 4?", "schema": "CREATE TABLE table_name_81 (season VARCHAR, opponent VARCHAR, wicket VARCHAR)", "sql": "SELECT season FROM table_name_81 WHERE opponent = 'queensland' AND wicket = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total number of restorative justice programs, by state, with a start date within the last 5 years?", "schema": "CREATE TABLE restorative_justice_programs (program_id INT, state VARCHAR(20), start_date DATE); INSERT INTO restorative_justice_programs (program_id, state, start_date) VALUES (1, 'California', '2017-01-01'), (2, 'Texas', '2018-06-15'), (3, 'New York', '2019-02-03');", "sql": "SELECT restorative_justice_programs.state, COUNT(*) as num_programs FROM restorative_justice_programs WHERE restorative_justice_programs.start_date >= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) GROUP BY restorative_justice_programs.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 235, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which score happened on 11 february 1996?", "schema": "CREATE TABLE table_name_37 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_37 WHERE date = '11 february 1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many agricultural innovation projects were completed in the last 3 years, grouped by completion year?", "schema": "CREATE TABLE agri_innovation (id INT, project_name TEXT, completion_date DATE); INSERT INTO agri_innovation (id, project_name, completion_date) VALUES (1, 'Precision Agriculture Test', '2020-05-01'), (2, 'Drip Irrigation Implementation', '2021-08-15'), (3, 'Vertical Farming Study', '2022-02-28');", "sql": "SELECT YEAR(completion_date) AS completion_year, COUNT(*) AS projects_completed FROM agri_innovation WHERE completion_date >= DATE('2019-01-01') GROUP BY completion_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team picked 80?", "schema": "CREATE TABLE table_name_94 (team VARCHAR, pick VARCHAR)", "sql": "SELECT team FROM table_name_94 WHERE pick = 80;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the average age of players who use VR technology, by country?", "schema": "CREATE TABLE PlayerDemographics (PlayerID INT, Age INT, Gender VARCHAR(10), Country VARCHAR(50), VRTechnology BOOLEAN); INSERT INTO PlayerDemographics (PlayerID, Age, Gender, Country, VRTechnology) VALUES (1, 25, 'Male', 'USA', TRUE), (2, 30, 'Female', 'Canada', FALSE), (3, 22, 'Male', 'Mexico', TRUE);", "sql": "SELECT Country, AVG(Age) FROM PlayerDemographics WHERE VRTechnology = TRUE GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What are the names of vehicles that failed safety tests in Brazil in 2019?", "schema": "CREATE TABLE VehicleSafetyTestsBrazil (vehicle_id INT, model VARCHAR(100), passed BOOLEAN, country VARCHAR(50), year INT); INSERT INTO VehicleSafetyTestsBrazil (vehicle_id, model, passed, country, year) VALUES (1, 'Model S', false, 'Brazil', 2019), (2, 'Gol', true, 'Brazil', 2019);", "sql": "SELECT model FROM VehicleSafetyTestsBrazil WHERE passed = false AND country = 'Brazil' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the total value of virtual tour engagement in 'North America' in the last month?", "schema": "CREATE TABLE virtual_tours (virtual_tour_id INT, virtual_tour_location TEXT, engagement_date DATE, engagement_value INT); INSERT INTO virtual_tours (virtual_tour_id, virtual_tour_location, engagement_date, engagement_value) VALUES (1, 'Virtual Tour North America', '2022-02-01', 200), (2, 'Virtual Tour North America', '2022-02-05', 150);", "sql": "SELECT SUM(engagement_value) FROM virtual_tours WHERE engagement_date >= DATEADD(month, -1, GETDATE()) AND virtual_tour_location = 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What time was the fastest lap during Stoh's 200 in 1982?", "schema": "CREATE TABLE table_name_74 (fastest_lap VARCHAR, name VARCHAR)", "sql": "SELECT fastest_lap FROM table_name_74 WHERE name = 'stoh's 200';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of posts, likes, and shares for each sector in the ai_companies table?", "schema": "CREATE TABLE ai_companies (id INT, name VARCHAR(20), location VARCHAR(20), sector VARCHAR(20), employees INT, ethical_ai BOOLEAN); INSERT INTO ai_companies (id, name, location, sector, employees, ethical_ai) VALUES (3, 'DEF Tech', 'Canada', 'Natural Language Processing', 40, true);", "sql": "SELECT sector, COUNT(*) as posts, SUM(likes) as total_likes, SUM(shares) as total_shares FROM social_media_posts JOIN ai_companies ON social_media_posts.user_id = ai_companies.id GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "What is the maximum duration of a single weightlifting workout?", "schema": "CREATE TABLE Workouts (WorkoutID INT, MemberID INT, Duration INT, WorkoutType VARCHAR(20)); INSERT INTO Workouts (WorkoutID, MemberID, Duration, WorkoutType) VALUES (1, 1, 60, 'Yoga'), (2, 2, 90, 'Weightlifting'), (3, 3, 60, 'Yoga'), (4, 1, 45, 'Running'), (5, 2, 120, 'Weightlifting');", "sql": "SELECT MAX(Duration) FROM Workouts WHERE WorkoutType = 'Weightlifting';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the distribution of severity scores for vulnerabilities in the VulnAssess table?", "schema": "CREATE TABLE VulnAssess (systemName VARCHAR(50), severityScore INT); INSERT INTO VulnAssess (systemName, severityScore) VALUES ('SystemA', 7), ('SystemB', 5), ('SystemC', 3), ('SystemD', 6), ('SystemE', 8);", "sql": "SELECT severityScore, COUNT(*) FROM VulnAssess GROUP BY severityScore;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Find the number of students who dropped out in the last 3 months from the \"SchoolA\" database", "schema": "CREATE TABLE SchoolA (student_id INT, dropout_date DATE); INSERT INTO SchoolA (student_id, dropout_date) VALUES (1, '2022-01-01'), (2, '2022-02-15'), (3, '2021-12-20');", "sql": "SELECT COUNT(*) FROM SchoolA WHERE dropout_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'multirangetypes' (example 560).", "schema": null, "sql": "create temp table test2(f1 multitextrange1[]); -- fail\n\ndrop table test1;", "explanation": "DDL from PostgreSQL core regression test for Multirangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: How many languages for the 2001 (74th) awards?", "schema": "CREATE TABLE table_16254861_1 (language_s_ VARCHAR, year__ceremony_ VARCHAR)", "sql": "SELECT COUNT(language_s_) FROM table_16254861_1 WHERE year__ceremony_ = '2001 (74th)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show order ids and the number of products in each order.", "schema": "CREATE TABLE Order_items (order_id VARCHAR, product_id VARCHAR)", "sql": "SELECT order_id, COUNT(DISTINCT product_id) FROM Order_items GROUP BY order_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Create a view to display average response time by response type", "schema": "CREATE TABLE emergency_response (response_type VARCHAR(255), response_time TIME, location VARCHAR(255));", "sql": "CREATE VIEW avg_response_time AS SELECT response_type, AVG(TIME_TO_SEC(response_time))/60 as avg_response_time FROM emergency_response GROUP BY response_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team had a high rebound of perkins (9) and a game smaller than 78?", "schema": "CREATE TABLE table_name_39 (team VARCHAR, game VARCHAR, high_rebounds VARCHAR)", "sql": "SELECT team FROM table_name_39 WHERE game < 78 AND high_rebounds = 'perkins (9)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Brands Hatch race had Niki Lauda as its Fastest Lap?", "schema": "CREATE TABLE table_name_78 (race VARCHAR, fastest_lap VARCHAR, location VARCHAR)", "sql": "SELECT race FROM table_name_78 WHERE fastest_lap = 'niki lauda' AND location = 'brands hatch';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total CO2 offset for each carbon offset project?", "schema": "CREATE TABLE CarbonOffsetProjects (id INT, name VARCHAR(50), co2_offset FLOAT); INSERT INTO CarbonOffsetProjects (id, name, co2_offset) VALUES (1, 'ProjectA', 1000), (2, 'ProjectB', 2000), (3, 'ProjectC', 3000);", "sql": "SELECT name, SUM(co2_offset) FROM CarbonOffsetProjects GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'identity' (example 113).", "schema": null, "sql": "INSERT INTO itest7c DEFAULT VALUES;", "explanation": "DML from PostgreSQL core regression test for Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Res., when Round is greater than 2, and when Event is \"Midwest Cage Championships 25: Inferno\"?", "schema": "CREATE TABLE table_name_92 (res VARCHAR, round VARCHAR, event VARCHAR)", "sql": "SELECT res FROM table_name_92 WHERE round > 2 AND event = 'midwest cage championships 25: inferno';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "List the names of soccer teams in the Premier League that have more than 50% of their players born outside of the UK.", "schema": "CREATE TABLE IF NOT EXISTS players (id INT, name VARCHAR(50), position VARCHAR(50), team VARCHAR(50), country VARCHAR(50)); CREATE VIEW IF NOT EXISTS uk_players AS SELECT team, COUNT(*) AS count FROM players WHERE country = 'UK' GROUP BY team;", "sql": "SELECT team FROM players JOIN uk_players ON players.team = uk_players.team WHERE players.team IN (SELECT team FROM uk_players WHERE count < COUNT(*) * 0.5) AND players.country != 'UK' GROUP BY team HAVING COUNT(*) > (SELECT COUNT(*)/2 FROM players WHERE team = players.team);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 275, "num_statements": 1} {"question": "What is the maximum depth of all the oceans?'", "schema": "CREATE TABLE ocean_depths (ocean TEXT, max_depth FLOAT); INSERT INTO ocean_depths (ocean, max_depth) VALUES ('Pacific', 36000.0); INSERT INTO ocean_depths (ocean, max_depth) VALUES ('Atlantic', 32000.0);", "sql": "SELECT ocean, MAX(max_depth) FROM ocean_depths;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the 2004 season with a 0-2 score what was the name of the venue?", "schema": "CREATE TABLE table_name_5 (venue VARCHAR, score VARCHAR, season VARCHAR)", "sql": "SELECT venue FROM table_name_5 WHERE score = '0-2' AND season = 2004;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the number of employees by department and gender?", "schema": "CREATE TABLE Employees (EmployeeID int, FirstName varchar(50), LastName varchar(50), Department varchar(50), Gender varchar(50), Salary decimal(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Gender, Salary) VALUES (1, 'John', 'Doe', 'IT', 'Male', 75000); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Gender, Salary) VALUES (2, 'Jane', 'Doe', 'HR', 'Female', 80000);", "sql": "SELECT Department, Gender, COUNT(*) as TotalEmployees FROM Employees GROUP BY Department, Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the average temperature anomaly for the year 2020?", "schema": "CREATE TABLE climate_data (id INT, year INT, temperature_anomaly DECIMAL); INSERT INTO climate_data (id, year, temperature_anomaly) VALUES (1, 2020, 1.2); INSERT INTO climate_data (id, year, temperature_anomaly) VALUES (2, 2019, 0.8);", "sql": "SELECT AVG(temperature_anomaly) FROM climate_data WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average claim amount for policyholders with 'life insurance' policies in the 'west' region?", "schema": "CREATE TABLE policyholders (id INT, policy_type VARCHAR(20), region VARCHAR(10), claim_amount INT); INSERT INTO policyholders (id, policy_type, region, claim_amount) VALUES (1, 'life insurance', 'west', 5000), (2, 'health insurance', 'east', 3000), (3, 'life insurance', 'west', 1000);", "sql": "SELECT AVG(claim_amount) FROM policyholders WHERE policy_type = 'life insurance' AND region = 'west';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the visitor at 5:00 pm?", "schema": "CREATE TABLE table_name_42 (visitor VARCHAR, time VARCHAR)", "sql": "SELECT visitor FROM table_name_42 WHERE time = '5:00 pm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of votes for the French Professor?", "schema": "CREATE TABLE table_name_20 (votes INTEGER, occupation VARCHAR)", "sql": "SELECT MAX(votes) FROM table_name_20 WHERE occupation = 'french professor';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total funding received by each space agency for space exploration programs?", "schema": "CREATE TABLE space_agencies (id INT, name VARCHAR(255), total_funding FLOAT, PRIMARY KEY(id)); INSERT INTO space_agencies (id, name, total_funding) VALUES (1, 'Agency1', 1000000), (2, 'Agency2', 2000000), (3, 'Agency3', 1500000);", "sql": "SELECT space_agencies.name, SUM(space_agencies.total_funding) FROM space_agencies GROUP BY space_agencies.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--0.2.0--0.3.0, item 1).", "schema": null, "sql": "CREATE TABLE part_grants (\n parent_table text PRIMARY KEY REFERENCES @extschema@.part_config (parent_table) ON DELETE CASCADE ON UPDATE CASCADE,\n grants text,\n roles text\n);", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 404).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_rightop ( NAME, NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the count of 'Sustainable Fashion' related events held in 'Africa' in the year 2022?", "schema": "CREATE TABLE africa_events (id INT, event_type VARCHAR(30), event_year INT);INSERT INTO africa_events (id, event_type, event_year) VALUES (1, 'Sustainable Fashion', 2022), (2, 'Fashion Trend', 2021), (3, 'Sustainable Fashion', 2023), (4, 'Fashion Trend', 2020);", "sql": "SELECT COUNT(*) FROM africa_events WHERE event_type = 'Sustainable Fashion' AND event_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many losses did the Michigan State Spartans have?", "schema": "CREATE TABLE table_1672976_2 (loss INTEGER, institution VARCHAR)", "sql": "SELECT MAX(loss) FROM table_1672976_2 WHERE institution = 'Michigan State Spartans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What are the names of the algorithms that address algorithmic fairness and are part of the 'fairness_algorithms' table?", "schema": "CREATE TABLE fairness_algorithms (algorithm_id INT, algorithm_name VARCHAR(50), algorithm_type VARCHAR(20), description TEXT); INSERT INTO fairness_algorithms (algorithm_id, algorithm_name, algorithm_type, description) VALUES (1, 'Demographic Parity', 'pre-processing', 'A method that adjusts the training data to meet demographic parity.'), (2, 'Equal Opportunity', 'post-processing', 'A method that adjusts predictions to meet equal opportunity.'), (3, 'Reweighing', 'pre-processing', 'A method that reweights the training instances to meet fairness criteria.');", "sql": "SELECT algorithm_name FROM fairness_algorithms;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total for the team with 0 bronze and 3 silver?", "schema": "CREATE TABLE table_name_41 (total INTEGER, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT SUM(total) FROM table_name_41 WHERE bronze = 0 AND silver = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'sequence': Write the SELECT query (example 66).", "schema": null, "sql": "SELECT * FROM foo_seq_new;", "explanation": "Regression test for Sequence in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM foo_seq_new) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT JSON_SERIALIZE('{ \"a\" : 1 } ' RETURNING bytea);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_SERIALIZE('{ \"a\" : 1 } ' RETURNING bytea)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 1} {"question": "Delete companies founded before a specific year", "schema": "CREATE TABLE companies (id INT, name TEXT, founded DATE); INSERT INTO companies (id, name, founded) VALUES (1, 'Foobar Inc', '2017-01-01'), (2, 'Gizmos Inc', '2019-06-15'), (3, 'Widgets Inc', '2015-09-27'), (4, 'Doodads Inc', '2018-03-12'), (5, 'Thingamajigs Inc', '2021-01-01');", "sql": "DELETE FROM companies WHERE founded < '2018-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the maximum maritime law penalty in the Caribbean in USD?", "schema": "CREATE TABLE maritime_laws (law_id INT, law_name VARCHAR(50), region VARCHAR(50), penalty_amount INT);", "sql": "SELECT MAX(penalty_amount) FROM maritime_laws WHERE region = 'Caribbean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Date of Appointment, when Outgoing Manager is \"Giray Bulak\"?", "schema": "CREATE TABLE table_name_69 (date_of_appointment VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT date_of_appointment FROM table_name_69 WHERE outgoing_manager = 'giray bulak';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average number of publications by graduate students in the English program?", "schema": "CREATE TABLE GraduateStudents(Id INT, Name VARCHAR(100), Program VARCHAR(50), Publications INT); INSERT INTO GraduateStudents(Id, Name, Program, Publications) VALUES (1, 'Sam', 'English', 3), (2, 'Tina', 'English', 4);", "sql": "SELECT AVG(Publications) FROM GraduateStudents WHERE Program = 'English';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total weight of cannabis flower sold by dispensaries in Washington in Q2 2022?", "schema": "CREATE TABLE Dispensaries (id INT, name TEXT, state TEXT);CREATE TABLE Sales (id INT, dispensary_id INT, weight DECIMAL, sale_date DATE, product_type TEXT); INSERT INTO Dispensaries (id, name, state) VALUES (1, 'Dispensary A', 'Washington'); INSERT INTO Sales (id, dispensary_id, weight, sale_date, product_type) VALUES (1, 1, 100, '2022-04-01', 'flower');", "sql": "SELECT SUM(s.weight) FROM Dispensaries d INNER JOIN Sales s ON d.id = s.dispensary_id WHERE d.state = 'Washington' AND s.product_type = 'flower' AND s.sale_date BETWEEN '2022-04-01' AND '2022-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 87).", "schema": null, "sql": "SELECT geo_distance('(0,0)'::point,'(0,90)'::point)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Rank teams by their average ticket sales, in descending order.", "schema": "CREATE TABLE team_performance (team_id INT, sales INT);", "sql": "SELECT team_id, RANK() OVER (ORDER BY AVG(sales) DESC) as rank FROM team_performance GROUP BY team_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the season number of the episode with series number 84?", "schema": "CREATE TABLE table_2226817_6 (no_in_season INTEGER, no_in_series VARCHAR)", "sql": "SELECT MIN(no_in_season) FROM table_2226817_6 WHERE no_in_series = 84;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many original air dates were there for episode 17?", "schema": "CREATE TABLE table_15430813_1 (original_air_date VARCHAR, no_in_season VARCHAR)", "sql": "SELECT COUNT(original_air_date) FROM table_15430813_1 WHERE no_in_season = 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 479).", "schema": null, "sql": "SELECT 'a'::citext ~<=~ 'B'::citext AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "List all farmers who cultivate 'Rice' and their corresponding regions.", "schema": "CREATE TABLE farmer (id INT PRIMARY KEY, name VARCHAR(50), crop_id INT, region_id INT); CREATE TABLE crop (id INT PRIMARY KEY, name VARCHAR(50)); CREATE TABLE region (id INT PRIMARY KEY, name VARCHAR(50)); INSERT INTO crop (id, name) VALUES (1, 'Rice'); INSERT INTO region (id, name) VALUES (1, 'Delta Region'); INSERT INTO farmer (id, name, crop_id, region_id) VALUES (1, 'John Doe', 1, 1);", "sql": "SELECT f.name, r.name AS region_name FROM farmer f INNER JOIN crop c ON f.crop_id = c.id INNER JOIN region r ON f.region_id = r.id WHERE c.name = 'Rice';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Delete the 'Antarctica' habitat record from the 'habitats' table", "schema": "CREATE TABLE habitats (id INT, name VARCHAR(50), location VARCHAR(50), size FLOAT);", "sql": "DELETE FROM habitats WHERE location = 'Antarctica';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which episode was Transmitted on wednesday if the episode of \"438 magic: the gathering mini masters tournament\" was transmitted on thursday?", "schema": "CREATE TABLE table_18173916_8 (wednesday VARCHAR, thursday VARCHAR)", "sql": "SELECT wednesday FROM table_18173916_8 WHERE thursday = '438 Magic: The Gathering Mini Masters Tournament';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the difference in the number of male and female players who prefer using VR technology for gaming?", "schema": "CREATE TABLE PlayerGenders (PlayerID INT, Gender VARCHAR(50)); INSERT INTO PlayerGenders (PlayerID, Gender) VALUES (1, 'Male'), (2, 'Female'), (3, 'Male'), (4, 'Male'), (5, 'Female'); CREATE TABLE PlayerPreferences (PlayerID INT, Preference VARCHAR(50)); INSERT INTO PlayerPreferences (PlayerID, Preference) VALUES (1, 'VR'), (2, 'Non-VR'), (3, 'VR'), (4, 'VR'), (5, 'Non-VR');", "sql": "(SELECT COUNT(*) FROM PlayerPreferences JOIN PlayerGenders ON PlayerPreferences.PlayerID = PlayerGenders.PlayerID WHERE PlayerPreferences.Preference = 'VR' AND PlayerGenders.Gender = 'Male' EXCEPT SELECT COUNT(*) FROM PlayerPreferences JOIN PlayerGenders ON PlayerPreferences.PlayerID = PlayerGenders.PlayerID WHERE PlayerPreferences.Preference = 'VR' AND PlayerGenders.Gender = 'Female');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 389, "num_statements": 1} {"question": "What is the average age of teachers who have completed at least one professional development course in the past year?", "schema": "CREATE TABLE teachers (teacher_id INT, age INT, num_courses_completed INT); INSERT INTO teachers (teacher_id, age, num_courses_completed) VALUES (1, 35, 2), (2, 45, 0), (3, 30, 1), (4, 50, 3);", "sql": "SELECT AVG(age) FROM teachers WHERE num_courses_completed >= (SELECT COUNT(course_id) FROM courses WHERE completion_date > DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "What is the name of the communication campaign that started last in the 'climate_communication' table?", "schema": "CREATE TABLE climate_communication (campaign_name TEXT, start_date DATE); INSERT INTO climate_communication (campaign_name, start_date) VALUES ('Climate Action', '2021-01-01'), ('Green Tomorrow', '2022-01-01'), ('Green Horizons', '2023-01-01');", "sql": "SELECT campaign_name FROM climate_communication ORDER BY start_date DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the distribution of sentiment scores for creative AI applications in different countries?", "schema": "CREATE TABLE creative_ai (id INT, country VARCHAR, timestamp TIMESTAMP, sentiment FLOAT);", "sql": "SELECT country, PERCENT_RANK() OVER (PARTITION BY country ORDER BY timestamp, sentiment) FROM creative_ai;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 106, "num_statements": 1} {"question": "What is the minimum donation amount and the number of donors who made donations equal to the minimum amount?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationAmount DECIMAL(10,2)); INSERT INTO Donors (DonorID, DonorName, DonationAmount) VALUES (1, 'John Doe', 50.00); INSERT INTO Donors (DonorID, DonorName, DonationAmount) VALUES (2, 'Jane Smith', 100.00); INSERT INTO Donors (DonorID, DonorName, DonationAmount) VALUES (3, 'Bob Johnson', 50.00);", "sql": "SELECT MIN(DonationAmount), COUNT(*) FROM Donors GROUP BY DonationAmount HAVING DonationAmount = (SELECT MIN(DonationAmount) FROM Donors);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the average delivery time for each transportation mode, computed across all shipments?", "schema": "CREATE TABLE shipments (id INT, transportation_mode VARCHAR(50), delivery_time INT); INSERT INTO shipments (id, transportation_mode, delivery_time) VALUES (1, 'Air', 5), (2, 'Sea', 7), (3, 'Rail', 6), (4, 'Air', 4);", "sql": "SELECT transportation_mode, AVG(delivery_time) as avg_delivery_time FROM shipments GROUP BY transportation_mode;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 639).", "schema": null, "sql": "SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 29).", "schema": null, "sql": "CREATE INDEX grect2ind ON fast_emp4000 USING gist (home_base);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the incumbent for first elected 1956", "schema": "CREATE TABLE table_1341930_40 (incumbent VARCHAR, first_elected VARCHAR)", "sql": "SELECT incumbent FROM table_1341930_40 WHERE first_elected = 1956;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the minimum cargo weight and the number of voyages for vessels with 'OOCL' prefix in the Atlantic Ocean in 2018?", "schema": "CREATE TABLE Vessels (ID INT, Name TEXT, Cargo_Weight INT, Voyages INT, Prefix TEXT, Year INT);CREATE VIEW Atlantic_Ocean_Vessels AS SELECT * FROM Vessels WHERE Region = 'Atlantic Ocean';", "sql": "SELECT MIN(Cargo_Weight), SUM(Voyages) FROM Atlantic_Ocean_Vessels WHERE Prefix = 'OOCL' AND Year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Trigger (example 41).", "schema": null, "sql": "INSERT INTO trigger_test (i, v) values (10000, 'top');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the aggregate number of yes votes where no votes is littler than 299939.1619948521 and % yes is 66.49%", "schema": "CREATE TABLE table_256286_41 (yes_votes VARCHAR, no_votes VARCHAR, _percentage_yes VARCHAR)", "sql": "SELECT COUNT(yes_votes) FROM table_256286_41 WHERE no_votes < 299939.1619948521 AND _percentage_yes = '66.49%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the English title for season 6?", "schema": "CREATE TABLE table_name_91 (title__english_ VARCHAR, number_of_season VARCHAR)", "sql": "SELECT title__english_ FROM table_name_91 WHERE number_of_season = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'explain': Write the SELECT query (example 50).", "schema": null, "sql": "select explain_filter('explain (verbose) select * from int8_tbl i8');", "explanation": "Regression test for Explain in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select explain_filter('explain (verbose) select * from int8_tbl i8')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Korean dialect in the daegu capital?", "schema": "CREATE TABLE table_name_71 (korean_dialect VARCHAR, capital VARCHAR)", "sql": "SELECT korean_dialect FROM table_name_71 WHERE capital = 'daegu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Which climate adaptation projects were completed in Southeast Asia in 2017 and what was their combined cost?", "schema": "CREATE TABLE climate_adaptation (year INT, region VARCHAR(50), project VARCHAR(50), cost FLOAT); INSERT INTO climate_adaptation (year, region, project, cost) VALUES (2017, 'Southeast Asia', 'Adaptation Project 1', 150000);", "sql": "SELECT region, SUM(cost) FROM climate_adaptation WHERE year = 2017 AND region = 'Southeast Asia' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "How many times has the term 'disinformation' appeared in articles published by media outlet C in the last 5 years?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(255), publication_date DATE, outlet_name VARCHAR(50)); INSERT INTO articles (id, title, publication_date, outlet_name) VALUES (1, 'Article1', '2021-01-01', 'OutletC'), (2, 'Article2', '2020-12-31', 'OutletC');", "sql": "SELECT COUNT(*) as disinformation_mentions FROM articles WHERE outlet_name = 'OutletC' AND publication_date >= DATE_SUB(CURDATE(), INTERVAL 5 YEAR) AND title LIKE '%disinformation%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: \" what's the engine with turbo being yes (mitsubishi td04-15g ) \"", "schema": "CREATE TABLE table_11167610_1 (engine VARCHAR, turbo VARCHAR)", "sql": "SELECT engine FROM table_11167610_1 WHERE turbo = 'Yes (Mitsubishi TD04-15g )';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "pgTAP test for Index (assertion 66).", "schema": null, "sql": "SELECT * FROM check_test(\n index_is_unique( 'idx_bar' ),\n false,\n 'index_is_unique() fail index only',\n 'Index idx_bar should be unique',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What home team has a score of 5 - 5?", "schema": "CREATE TABLE table_name_30 (home_team VARCHAR, score VARCHAR)", "sql": "SELECT home_team FROM table_name_30 WHERE score = '5 - 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total revenue for stores in each country, with a 10% discount applied?", "schema": "CREATE TABLE regions (id INT, country VARCHAR(255), sales_volume FLOAT); INSERT INTO regions (id, country, sales_volume) VALUES (3, 'Germany', 7000.00); INSERT INTO regions (id, country, sales_volume) VALUES (4, 'Spain', 4000.00);", "sql": "SELECT r.country, SUM(s.revenue * 0.9) as total_revenue FROM sales s JOIN regions r ON s.store LIKE CONCAT('%', r.country, '%') GROUP BY r.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 91).", "schema": null, "sql": "CREATE TABLE testpub_parted2 (LIKE testpub_parted);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was terry cook picked?", "schema": "CREATE TABLE table_name_92 (pick INTEGER, player VARCHAR)", "sql": "SELECT AVG(pick) FROM table_name_92 WHERE player = 'terry cook';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years are listed for the Tahunanui school with an authority of state?", "schema": "CREATE TABLE table_name_31 (years VARCHAR, authority VARCHAR, name VARCHAR)", "sql": "SELECT years FROM table_name_31 WHERE authority = 'state' AND name = 'tahunanui school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Year is the highest one that has a Next Highest Spender of aarp, and a US Cham Spending of $39,805,000, and a US Cham Rank larger than 1?", "schema": "CREATE TABLE table_name_16 (year INTEGER, us_cham_rank VARCHAR, next_highest_spender VARCHAR, us_cham_spending VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_16 WHERE next_highest_spender = 'aarp' AND us_cham_spending = '$39,805,000' AND us_cham_rank > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Identify the most popular pick-up and drop-off locations for bus trips", "schema": "CREATE TABLE bus_trip (trip_id INT, pickup_location VARCHAR(50), dropoff_location VARCHAR(50), trip_date DATE);", "sql": "SELECT pickup_location AS most_popular_pickup, dropoff_location AS most_popular_dropoff, COUNT(*) AS trip_count FROM bus_trip GROUP BY pickup_location, dropoff_location ORDER BY trip_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Which esports events had more than 10000 attendees?", "schema": "CREATE TABLE esports_events (id INT, name VARCHAR(50), attendees INT); INSERT INTO esports_events (id, name, attendees) VALUES (1, 'ESL One', 15000), (2, 'DreamHack', 12000), (3, 'PGL Major', 25000);", "sql": "SELECT name FROM esports_events WHERE attendees > 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of Green_Infrastructure projects in 'City O' and 'City P'?", "schema": "CREATE TABLE Green_Infrastructure (id INT, project_name VARCHAR(50), location VARCHAR(50), cost FLOAT); INSERT INTO Green_Infrastructure (id, project_name, location, cost) VALUES (1, 'Urban Farming', 'City O', 2000000); INSERT INTO Green_Infrastructure (id, project_name, location, cost) VALUES (2, 'Community Gardens', 'City P', 3000000);", "sql": "SELECT COUNT(*) FROM Green_Infrastructure WHERE location IN ('City O', 'City P');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 27).", "schema": null, "sql": "CREATE FUNCTION result_empty_test() RETURNS void\nAS $$\nresult = plpy.execute(\"select 1 where false\")\n\nplpy.info(result[:])\n\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "What is the average session length for each game genre, sorted by average session length?", "schema": "CREATE TABLE game_sessions(id INT, user_id INT, game_name VARCHAR(50), start_time DATETIME, end_time DATETIME); CREATE TABLE games(id INT, name VARCHAR(50), genre VARCHAR(50));", "sql": "SELECT genres.genre, AVG(TIMESTAMPDIFF(SECOND, start_time, end_time)) as avg_session_length FROM game_sessions JOIN games ON game_sessions.game_name = games.name JOIN (SELECT DISTINCT game_name, genre FROM game_sessions JOIN games ON game_sessions.game_name = games.name) genres ON games.name = genres.game_name GROUP BY genres.genre ORDER BY avg_session_length DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 367, "num_statements": 1} {"question": "What is the average order value per customer for the last week, with orders ranked by total price in descending order?", "schema": "CREATE TABLE orders (order_id INT, customer_id INT, order_date DATE, total_price DECIMAL(10,2));", "sql": "SELECT customer_id, AVG(total_price) as avg_order_value FROM orders WHERE order_date >= DATEADD(day, -7, GETDATE()) GROUP BY customer_id ORDER BY AVG(total_price) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "What is the maximum number of hours volunteered in a single week by a volunteer, and who was the volunteer?", "schema": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, hours DECIMAL, week INT); INSERT INTO volunteer_hours (id, volunteer_id, hours, week) VALUES (1, 1, 5.0, 1), (2, 2, 10.0, 1), (3, 3, 7.5, 1), (4, 1, 4.0, 2), (5, 3, 8.0, 2); CREATE TABLE volunteers (id INT, name TEXT); INSERT INTO volunteers (id, name) VALUES (1, 'Samir'), (2, 'Sophia'), (3, 'Taro');", "sql": "SELECT MAX(hours) AS max_hours, volunteer_id FROM volunteer_hours GROUP BY volunteer_id; SELECT name, volunteer_id FROM volunteers WHERE volunteer_id IN (SELECT volunteer_id FROM (SELECT MAX(hours) AS max_hours, volunteer_id FROM volunteer_hours GROUP BY volunteer_id) AS subquery WHERE max_hours = (SELECT MAX(hours) FROM volunteer_hours));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 341, "num_statements": 2} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 700).", "schema": null, "sql": "SELECT to_char('100e9'::numeric, 'RN');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char('100e9'::numeric, 'RN')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Find the names and positions of employees who earn more than the average salary?", "schema": "CREATE TABLE employee_salaries (id INT, name VARCHAR(50), position VARCHAR(50), salary INT); CREATE VIEW avg_salary AS SELECT AVG(salary) AS avg_salary FROM employee_salaries;", "sql": "SELECT name, position FROM employee_salaries WHERE salary > (SELECT avg_salary FROM avg_salary);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Find the number of applicants by job title and the percentage of applicants who were hired", "schema": "CREATE TABLE job_applications(app_id INT, job_id INT, hired BOOLEAN); INSERT INTO job_applications VALUES (1, 1, TRUE), (2, 1, FALSE), (3, 2, TRUE), (4, 2, TRUE), (5, 3, FALSE);", "sql": "SELECT j.job_title, COUNT(a.app_id) as num_applicants, (COUNT(CASE WHEN a.hired THEN 1 END) / COUNT(a.app_id)) * 100 as pct_hired FROM job_applications a JOIN job_openings j ON a.job_id = j.job_id GROUP BY j.job_title;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the most rebounds when the team record was 15-27?", "schema": "CREATE TABLE table_23285805_6 (high_rebounds VARCHAR, record VARCHAR)", "sql": "SELECT high_rebounds FROM table_23285805_6 WHERE record = '15-27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the tournament held in Indonesia?", "schema": "CREATE TABLE table_name_23 (date VARCHAR, country VARCHAR)", "sql": "SELECT date FROM table_name_23 WHERE country = 'indonesia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Total larger than 3, and a Rank of 4, and a Silver larger than 0 has what average gold?", "schema": "CREATE TABLE table_name_2 (gold INTEGER, silver VARCHAR, total VARCHAR, rank VARCHAR)", "sql": "SELECT AVG(gold) FROM table_name_2 WHERE total > 3 AND rank = '4' AND silver > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the position of club Melilla CF, with a goal difference smaller than -10?", "schema": "CREATE TABLE table_name_63 (position INTEGER, club VARCHAR, goal_difference VARCHAR)", "sql": "SELECT SUM(position) FROM table_name_63 WHERE club = 'melilla cf' AND goal_difference < -10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the total transaction amount for the 'Online' customer segment in the last quarter?", "schema": "CREATE TABLE customers (id INT, segment VARCHAR(20)); CREATE TABLE transactions (id INT, customer_id INT, amount DECIMAL(10,2), transaction_date DATE); INSERT INTO customers (id, segment) VALUES (1, 'Online'); INSERT INTO transactions (id, customer_id, amount, transaction_date) VALUES (1, 1, 500, '2022-04-01');", "sql": "SELECT SUM(amount) FROM transactions JOIN customers ON transactions.customer_id = customers.id WHERE customers.segment = 'Online' AND transaction_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the withdrawal rate for the school district with a graduation rate of 89.3%?", "schema": "CREATE TABLE table_21514460_1 (withdrawal_rate__2010_11_ VARCHAR, graduation_rate__2011_12_ VARCHAR)", "sql": "SELECT withdrawal_rate__2010_11_ FROM table_21514460_1 WHERE graduation_rate__2011_12_ = '89.3%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the running total of 'tech_for_social_good_hours' for each volunteer, for the 'tech4good' database, ordered by volunteer_id and date?", "schema": "CREATE TABLE tech4good (id INT, volunteer_id INT, tech_for_social_good_date DATE, tech_for_social_good_hours INT); INSERT INTO tech4good (id, volunteer_id, tech_for_social_good_date, tech_for_social_good_hours) VALUES (1, 1001, '2022-01-01', 6); INSERT INTO tech4good (id, volunteer_id, tech_for_social_good_date, tech_for_social_good_hours) VALUES (2, 1001, '2022-01-03', 9); INSERT INTO tech4good (id, volunteer_id, tech_for_social_good_date, tech_for_social_good_hours) VALUES (3, 1002, '2022-01-02', 4);", "sql": "SELECT volunteer_id, tech_for_social_good_date, tech_for_social_good_hours, SUM(tech_for_social_good_hours) OVER (PARTITION BY volunteer_id ORDER BY tech_for_social_good_date) as running_total FROM tech4good;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 208, "num_statements": 1} {"question": "Find the total number of intelligence operations involving submarines.", "schema": "CREATE SCHEMA if not exists intel_ops (Operation VARCHAR(255), Equipment VARCHAR(255)); INSERT INTO intel_ops VALUES ('Op1', 'Submarine'), ('Op2', 'Satellite');", "sql": "SELECT COUNT(*) FROM intel_ops WHERE Equipment = 'Submarine';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 113).", "schema": null, "sql": "CREATE FUNCTION isnle(isbn, ean13)\n\tRETURNS boolean\n\tAS 'int8le'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the earliest discovery date for an exoplanet?", "schema": "CREATE TABLE exoplanets (id INT, name VARCHAR(255), discovery_date DATE, discovery_method VARCHAR(255));", "sql": "SELECT MIN(exoplanets.discovery_date) FROM exoplanets;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the percentage of climate finance allocated to the Indigenous communities?", "schema": "CREATE TABLE finance_distribution (group VARCHAR(255), funding FLOAT);", "sql": "SELECT (SUM(CASE WHEN group = 'Indigenous communities' THEN funding ELSE 0 END) / SUM(funding)) * 100 FROM finance_distribution;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "How many instances of disinformation were detected in a specific time period?", "schema": "CREATE TABLE disinformation (id INT, detected_at TIMESTAMP, source VARCHAR, confirmed BOOLEAN); INSERT INTO disinformation (id, detected_at, source, confirmed) VALUES (1, '2021-01-01 12:00:00', 'WebsiteA', true); INSERT INTO disinformation (id, detected_at, source, confirmed) VALUES (2, '2021-01-02 13:00:00', 'WebsiteB', false);", "sql": "SELECT COUNT(*) FROM disinformation WHERE detected_at BETWEEN '2021-01-01' AND '2021-01-07' AND confirmed = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the number of losses with wins less than 4 and goals against more than 39?", "schema": "CREATE TABLE table_name_28 (losses VARCHAR, goals_against VARCHAR, wins VARCHAR)", "sql": "SELECT COUNT(losses) FROM table_name_28 WHERE goals_against > 39 AND wins < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 579).", "schema": null, "sql": "select jsonb_path_query('\"12:34:56.789 +05:30\"', '$.time_tz(2)');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"12:34:56.789 +05:30\"', '$.time_tz(2)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Update the weight of the 'Elephant' in the 'Forest' habitat to 5500.0.", "schema": "CREATE TABLE animals (id INT, animal_name VARCHAR(255), habitat_type VARCHAR(255), weight DECIMAL(5,2)); INSERT INTO animals (id, animal_name, habitat_type, weight) VALUES (1, 'Lion', 'Savannah', 190.0), (2, 'Elephant', 'Forest', 6000.0), (3, 'Hippo', 'Wetlands', 3300.0), (4, 'Giraffe', 'Savannah', 1600.0), (5, 'Duck', 'Wetlands', 15.0), (6, 'Bear', 'Mountains', 300.0);", "sql": "UPDATE animals SET weight = 5500.0 WHERE animal_name = 'Elephant' AND habitat_type = 'Forest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score in the game played on December 2, 1984?", "schema": "CREATE TABLE table_14863869_1 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_14863869_1 WHERE date = 'December 2, 1984';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total investment for each product?", "schema": "CREATE TABLE investments (client_id INT, product VARCHAR(50), value DECIMAL(10,2)); INSERT INTO investments (client_id, product, value) VALUES (1, 'Stocks', 25000.00); INSERT INTO investments (client_id, product, value) VALUES (1, 'Bonds', 15000.00); INSERT INTO investments (client_id, product, value) VALUES (2, 'Stocks', 30000.00); INSERT INTO investments (client_id, product, value) VALUES (2, 'Mutual Funds', 10000.00);", "sql": "SELECT product, SUM(value) as total_investment FROM investments GROUP BY product;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Determine the number of unique habitats where each species is present, and display the results in a table format with species and their respective number of habitats.", "schema": "CREATE TABLE AnimalHabitats (id INT PRIMARY KEY, species VARCHAR(50), habitat VARCHAR(50));", "sql": "SELECT AnimalHabitats.species, COUNT(DISTINCT AnimalHabitats.habitat) FROM AnimalHabitats GROUP BY AnimalHabitats.species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest number against when the draws are less than 0?", "schema": "CREATE TABLE table_name_21 (against INTEGER, draws INTEGER)", "sql": "SELECT MIN(against) FROM table_name_21 WHERE draws < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the bleeding time in Bernard-soulier syndrome?", "schema": "CREATE TABLE table_1557752_1 (bleeding_time VARCHAR, condition VARCHAR)", "sql": "SELECT bleeding_time FROM table_1557752_1 WHERE condition = 'Bernard-Soulier syndrome';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average price of vegan dishes in each category?", "schema": "CREATE TABLE menu_categories (category_id INT, category VARCHAR(255));", "sql": "SELECT category, AVG(price) as avg_price FROM menus JOIN menu_categories ON menus.category = menu_categories.category WHERE menus.category IN ('vegan appetizers', 'vegan entrees', 'vegan desserts') GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "What is the total funding from private and public sources for each program category?", "schema": "CREATE TABLE program_funding (program_category VARCHAR(15), funding_source VARCHAR(15), amount INT);", "sql": "SELECT program_category, SUM(CASE WHEN funding_source = 'private' THEN 1 ELSE 0 END) + SUM(CASE WHEN funding_source = 'public' THEN 1 ELSE 0 END) AS total_funding FROM program_funding GROUP BY program_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Setof (example 9).", "schema": null, "sql": "SELECT test_setof_as_list(1, 'list');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Setof.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 58).", "schema": null, "sql": "SELECT date '1999-08-01';", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT date '1999-08-01') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the NBA draft result of the player from Dunbar High School?", "schema": "CREATE TABLE table_name_61 (nba_draft VARCHAR, school VARCHAR)", "sql": "SELECT nba_draft FROM table_name_61 WHERE school = 'dunbar high school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which headphone models have a driver-matched DB of 0.1 and a US MSRP of $49?", "schema": "CREATE TABLE table_1601027_1 (headphone_model VARCHAR, driver_matched_db VARCHAR, us_msrp VARCHAR)", "sql": "SELECT headphone_model FROM table_1601027_1 WHERE driver_matched_db = '0.1' AND us_msrp = '$49';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest Draft, when Nationality is \"Canada\", when Player is \"Shane Doan Category:Articles with hCards\", and when Round is less than 1?", "schema": "CREATE TABLE table_name_7 (draft INTEGER, round VARCHAR, nationality VARCHAR, player VARCHAR)", "sql": "SELECT MIN(draft) FROM table_name_7 WHERE nationality = 'canada' AND player = 'shane doan category:articles with hcards' AND round < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Label has a Format of cd, a Country of Japan and a Catalog of vjcp-68403?", "schema": "CREATE TABLE table_name_89 (label VARCHAR, catalog VARCHAR, format VARCHAR, country VARCHAR)", "sql": "SELECT label FROM table_name_89 WHERE format = 'cd' AND country = 'japan' AND catalog = 'vjcp-68403';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the distribution of user locations for articles about climate change?", "schema": "CREATE TABLE articles (title text, category text, user_location text); INSERT INTO articles (title, category, user_location) VALUES ('Article 1', 'climate change', 'USA'); INSERT INTO articles (title, category, user_location) VALUES ('Article 2', 'climate change', 'Canada');", "sql": "SELECT user_location, COUNT(*) as count FROM articles WHERE category = 'climate change' GROUP BY user_location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the total number of volunteers and total hours donated by them in '2022'?", "schema": "CREATE TABLE Volunteers (VolunteerID int, VolunteerName varchar(50), HoursDonated int, VolunteerYear int); INSERT INTO Volunteers (VolunteerID, VolunteerName, HoursDonated, VolunteerYear) VALUES (1, 'Grace Blue', 30, 2022), (2, 'Harry Yellow', 20, 2022), (3, 'Ivy Purple', 15, 2022), (4, 'Jack Orange', 25, 2022);", "sql": "SELECT COUNT(VolunteerName) as TotalVolunteers, SUM(HoursDonated) as TotalHours FROM Volunteers WHERE VolunteerYear = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'tsdicts' (example 75).", "schema": null, "sql": "CREATE TEXT SEARCH DICTIONARY hunspell_err (\n\t\t\t\t\t\tTemplate=ispell,\n\t\t\t\t\t\tDictFile=hunspell_sample_num,\n\t\t\t\t\t\tAffFile=hunspell_sample_long\n);", "explanation": "DDL from PostgreSQL core regression test for Tsdicts.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of bronze medals when the team's silver are more than 1 but the total medals are less than 9?", "schema": "CREATE TABLE table_name_87 (bronze INTEGER, silver VARCHAR, total VARCHAR)", "sql": "SELECT AVG(bronze) FROM table_name_87 WHERE silver > 1 AND total < 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all communities that have both eco-friendly and inclusive housing policies.", "schema": "CREATE TABLE communities (community VARCHAR(255), eco_friendly BOOLEAN, inclusive_policy BOOLEAN); INSERT INTO communities (community, eco_friendly, inclusive_policy) VALUES ('CommunityA', true, true), ('CommunityB', false, true), ('CommunityC', true, false);", "sql": "SELECT community FROM communities WHERE eco_friendly = true AND inclusive_policy = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result for Wisconsin?", "schema": "CREATE TABLE table_22603701_1 (result VARCHAR, college VARCHAR)", "sql": "SELECT result FROM table_22603701_1 WHERE college = 'Wisconsin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 376).", "schema": null, "sql": "INSERT INTO bv1 VALUES (-1, 'xxx'); -- should fail view WCO\nINSERT INTO bv1 VALUES (11, 'xxx'); -- should fail RLS check\nINSERT INTO bv1 VALUES (12, 'xxx'); -- ok\n\nEXPLAIN (COSTS OFF) UPDATE bv1 SET b = 'yyy' WHERE a = 4 AND f_leak(b);", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 235, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: Find the ids of all the order items whose product id is 11.", "schema": "CREATE TABLE order_items (order_item_id VARCHAR, product_id VARCHAR)", "sql": "SELECT order_item_id FROM order_items WHERE product_id = 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List the number of male and female patients diagnosed with any infectious disease in Seattle.", "schema": "CREATE TABLE Genders (GenderID INT, Age INT, Gender VARCHAR(10), City VARCHAR(20), Disease VARCHAR(20)); INSERT INTO Genders (GenderID, Age, Gender, City, Disease) VALUES (1, 50, 'Female', 'Seattle', 'Measles');", "sql": "SELECT Gender, COUNT(*) as PatientCount FROM Genders WHERE City = 'Seattle' GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Show an example of PostgreSQL WAIT FOR (example 5).", "schema": null, "sql": "postgres=# WAIT FOR LSN '0/306EE20' WITH (MODE 'primary_flush'); status --------- success (1 row);", "explanation": "PostgreSQL WAIT FOR command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What was the constructor when the laps were larger than 54, and the time/retired was +1 lap on a grid of 20?", "schema": "CREATE TABLE table_name_28 (constructor VARCHAR, grid VARCHAR, laps VARCHAR, time_retired VARCHAR)", "sql": "SELECT constructor FROM table_name_28 WHERE laps > 54 AND time_retired = '+1 lap' AND grid = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Show the number of satellites in each type of orbit", "schema": "CREATE TABLE satellites_by_country (satellite_id INT, country VARCHAR(50), orbit_type VARCHAR(50)); INSERT INTO satellites_by_country (satellite_id, country, orbit_type) VALUES (1, 'USA', 'Geostationary'); INSERT INTO satellites_by_country (satellite_id, country, orbit_type) VALUES (2, 'Russia', 'Low Earth Orbit'); INSERT INTO satellites_by_country (satellite_id, country, orbit_type) VALUES (3, 'China', 'Geostationary');", "sql": "SELECT orbit_type, COUNT(*) as num_satellites FROM satellites_by_country GROUP BY orbit_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Average risk rating for impact investments in the finance sector with an ESG score above 75?", "schema": "CREATE TABLE impact_investments_finance (id INT, sector VARCHAR(20), ESG_score FLOAT, risk_rating FLOAT); INSERT INTO impact_investments_finance (id, sector, ESG_score, risk_rating) VALUES (1, 'Finance', 80.0, 3.0), (2, 'Finance', 70.0, 4.0), (3, 'Finance', 85.0, 2.5);", "sql": "SELECT AVG(risk_rating) FROM impact_investments_finance WHERE sector = 'Finance' AND ESG_score > 75;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "List all defense projects that have experienced delays of more than 30 days, along with their original and current completion dates, and the names of the contractors responsible for each project.", "schema": "CREATE TABLE defense_projects (id INT, name VARCHAR(255), contractor_id INT, original_completion_date DATE, current_completion_date DATE); INSERT INTO defense_projects (id, name, contractor_id, original_completion_date, current_completion_date) VALUES (1, 'F-35 Joint Strike Fighter', 1, '2025-01-01', '2025-02-01'), (2, 'Boeing 777X', 2, '2024-01-01', '2024-01-15'), (3, 'Columbia-class Submarine', 3, '2028-01-01', '2028-05-01'); CREATE TABLE contractors (id INT, name VARCHAR(255)); INSERT INTO contractors (id, name) VALUES (1, 'Lockheed Martin'), (2, 'Boeing'), (3, 'General Dynamics');", "sql": "SELECT d.name, c.name as contractor_name, d.original_completion_date, d.current_completion_date, DATEDIFF(d.current_completion_date, d.original_completion_date) as delay_days FROM defense_projects d JOIN contractors c ON d.contractor_id = c.id WHERE DATEDIFF(d.current_completion_date, d.original_completion_date) > 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 319, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the constructor when the driver is juan pablo montoya?", "schema": "CREATE TABLE table_name_41 (constructor VARCHAR, driver VARCHAR)", "sql": "SELECT constructor FROM table_name_41 WHERE driver = 'juan pablo montoya';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What was the first spacecraft launched by each manufacturer?", "schema": "CREATE TABLE Spacecraft (id INT, name VARCHAR(50), manufacturer VARCHAR(50), launch_date DATE); INSERT INTO Spacecraft (id, name, manufacturer, launch_date) VALUES (1, 'Voyager 1', 'NASA', '1977-09-05'); INSERT INTO Spacecraft (id, name, manufacturer, launch_date) VALUES (2, 'Voyager 2', 'NASA', '1977-08-20'); INSERT INTO Spacecraft (id, name, manufacturer, launch_date) VALUES (3, 'Sputnik 1', 'Roscosmos', '1957-10-04');", "sql": "SELECT manufacturer, MIN(launch_date) as first_launch_date FROM Spacecraft GROUP BY manufacturer;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what character did Laara sadiq play", "schema": "CREATE TABLE table_name_50 (character_name VARCHAR, voice_actor__english_1998___pioneer_ VARCHAR)", "sql": "SELECT character_name FROM table_name_50 WHERE voice_actor__english_1998___pioneer_ = 'laara sadiq';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the nominated work for 1996 and festival of black maria film and video festival", "schema": "CREATE TABLE table_name_7 (nominated_work VARCHAR, year VARCHAR, festival VARCHAR)", "sql": "SELECT nominated_work FROM table_name_7 WHERE year = 1996 AND festival = 'black maria film and video festival';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of game 75?", "schema": "CREATE TABLE table_name_93 (date VARCHAR, game VARCHAR)", "sql": "SELECT date FROM table_name_93 WHERE game = 75;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result for years prior to 2005?", "schema": "CREATE TABLE table_name_83 (result VARCHAR, year INTEGER)", "sql": "SELECT result FROM table_name_83 WHERE year < 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the number of hospital visits for patients with diabetes by age group in Florida in 2019?", "schema": "CREATE TABLE hospital_visits_2 (id INT, patient_id INT, age_group TEXT, state TEXT, diagnosis TEXT, visit_date DATE); INSERT INTO hospital_visits_2 (id, patient_id, age_group, state, diagnosis, visit_date) VALUES (1, 789, '5-9', 'Florida', 'diabetes', '2019-03-04');", "sql": "SELECT age_group, SUM(CASE WHEN diagnosis = 'diabetes' THEN 1 ELSE 0 END) as num_visits FROM hospital_visits_2 WHERE state = 'Florida' AND visit_date >= '2019-01-01' AND visit_date < '2020-01-01' GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'event_trigger' (example 8).", "schema": null, "sql": "CREATE AGGREGATE schema_two.newton\n (BASETYPE = int, SFUNC = schema_two.add, STYPE = int);", "explanation": "DDL from PostgreSQL core regression test for Event Trigger.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the capital of the district who's simplified name is 信州区?", "schema": "CREATE TABLE table_1300525_1 (pinyin VARCHAR, simplified VARCHAR)", "sql": "SELECT COUNT(pinyin) FROM table_1300525_1 WHERE simplified = '信州区';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 144).", "schema": null, "sql": "SELECT cube_dim('(0,0,0)'::cube);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 33, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the IATA for Ringway Airport in the United Kingdom?", "schema": "CREATE TABLE table_name_1 (iata VARCHAR, country VARCHAR, airport VARCHAR)", "sql": "SELECT iata FROM table_name_1 WHERE country = 'united kingdom' AND airport = 'ringway airport';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copyselect' (example 1).", "schema": null, "sql": "create table test1 (id serial, t text);", "explanation": "DDL from PostgreSQL core regression test for Copyselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "Find the total number of transactions per month for the last year.", "schema": "CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_date DATE); INSERT INTO transactions (transaction_id, customer_id, transaction_date) VALUES (1, 1, '2021-01-01'); INSERT INTO transactions (transaction_id, customer_id, transaction_date) VALUES (2, 2, '2021-02-01');", "sql": "SELECT DATEPART(YEAR, transaction_date) as year, DATEPART(MONTH, transaction_date) as month, COUNT(*) as total_transactions FROM transactions WHERE transaction_date BETWEEN DATEADD(year, -1, GETDATE()) AND GETDATE() GROUP BY DATEPART(YEAR, transaction_date), DATEPART(MONTH, transaction_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 293, "num_statements": 1} {"question": "What is the total production by crop type for indigenous food systems?", "schema": "CREATE TABLE crops (id INT, farmer_id INT, crop_name VARCHAR(255), yield INT, sale_price FLOAT, system_type VARCHAR(255)); INSERT INTO crops (id, farmer_id, crop_name, yield, sale_price, system_type) VALUES (1, 1, 'Corn', 80, 2.50, 'Indigenous'); INSERT INTO crops (id, farmer_id, crop_name, yield, sale_price, system_type) VALUES (2, 1, 'Soybeans', 120, 3.25, 'Indigenous'); INSERT INTO crops (id, farmer_id, crop_name, yield, sale_price, system_type) VALUES (3, 2, 'Corn', 90, 2.75, 'Agroecology'); INSERT INTO crops (id, farmer_id, crop_name, yield, sale_price, system_type) VALUES (4, 2, 'Wheat', 100, 3.00, 'Urban Agriculture');", "sql": "SELECT crop_name, SUM(yield) as total_production FROM crops WHERE system_type = 'Indigenous' GROUP BY crop_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "List the number of mental health parity incidents in each province for the last 6 months.", "schema": "CREATE TABLE MentalHealthParity (IncidentID INT, IncidentDate DATE, Province VARCHAR(255)); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, Province) VALUES (1, '2022-01-01', 'Ontario'); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, Province) VALUES (2, '2022-02-15', 'Quebec'); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, Province) VALUES (3, '2022-03-05', 'British Columbia'); INSERT INTO MentalHealthParity (IncidentID, IncidentDate, Province) VALUES (4, '2022-04-10', 'Alberta');", "sql": "SELECT Province, COUNT(*) FROM MentalHealthParity WHERE IncidentDate >= DATEADD(month, -6, GETDATE()) GROUP BY Province;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the average depth for marine species in the Actinopterygii order, grouped by their family?", "schema": "CREATE TABLE marine_species (species_id INT, species_name VARCHAR(100), max_depth FLOAT, order_name VARCHAR(50), family VARCHAR(50));", "sql": "SELECT family, AVG(max_depth) FROM marine_species WHERE order_name = 'Actinopterygii' GROUP BY family;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the number of the episode seen by 2.99 millions of people in the US, where performer 2 was Heather Anne Campbell?", "schema": "CREATE TABLE table_23294081_11 (_number INTEGER, performer_2 VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT MIN(_number) FROM table_23294081_11 WHERE performer_2 = 'Heather Anne Campbell' AND us_viewers__millions_ = '2.99';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which song was picked that was originally performed by Marisa Monte?", "schema": "CREATE TABLE table_27616663_1 (song_choice VARCHAR, original_artist VARCHAR)", "sql": "SELECT song_choice FROM table_27616663_1 WHERE original_artist = 'Marisa Monte';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 119).", "schema": null, "sql": "SELECT '0 .. 1'::seg << '0'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Overall of 15 Club team with a Nationality of Canada?", "schema": "CREATE TABLE table_name_37 (club_team VARCHAR, nationality VARCHAR, overall VARCHAR)", "sql": "SELECT club_team FROM table_name_37 WHERE nationality = 'canada' AND overall = '15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 162).", "schema": null, "sql": "create table idxpart (a int, b int, c int) partition by range(a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 65, "num_statements": 1} {"question": "Insert a new record into the fair_trade table with the following information: 'Cooperative E', 'Africa', 'Yes'", "schema": "CREATE TABLE fair_trade (id INT PRIMARY KEY, cooperative VARCHAR(255), region VARCHAR(255), certified VARCHAR(5));", "sql": "INSERT INTO fair_trade (cooperative, region, certified) VALUES ('Cooperative E', 'Africa', 'Yes');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 414).", "schema": null, "sql": "$$ language plpgsql;\n\nselect exc_using(5);\n\ndrop function exc_using(int);\n\n-- test FOR-over-cursor\n\ncreate or replace function forc01() returns void as $$\ndeclare\n c cursor(r1 integer, r2 integer)\n for select * from generate_series(r1,r2) i;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 248, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: Which position did Redden play?", "schema": "CREATE TABLE table_14342210_6 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_14342210_6 WHERE player = 'Redden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 59).", "schema": null, "sql": "select (select grouping(ss.x))\nfrom int8_tbl i1\ncross join lateral (select (select i1.q1) as x) ss\ngroup by ss.x;", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select (select grouping(ss.x))\nfrom int8_tbl i1\ncross join lateral (select (select i1.q1) as x) ss\ngroup by ss.x) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What venue has don bradman (nsw) as the player?", "schema": "CREATE TABLE table_name_7 (venue VARCHAR, player VARCHAR)", "sql": "SELECT venue FROM table_name_7 WHERE player = 'don bradman (nsw)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What day did Geelong play as the away team?", "schema": "CREATE TABLE table_name_73 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_73 WHERE away_team = 'geelong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total installed capacity of wind turbines in China?", "schema": "CREATE TABLE wind_turbines (id INT, name TEXT, country TEXT, installed_capacity INT); INSERT INTO wind_turbines (id, name, country, installed_capacity) VALUES (1, 'Gansu Wind Farm', 'China', 7965), (2, 'Jiuquan Wind Power Base', 'China', 10000);", "sql": "SELECT SUM(installed_capacity) FROM wind_turbines WHERE country = 'China';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many ethical AI projects were completed by organizations in Europe?", "schema": "CREATE TABLE AIProjects (id INT, name VARCHAR(50), organization VARCHAR(50), region VARCHAR(50), completed BOOLEAN); INSERT INTO AIProjects (id, name, organization, region, completed) VALUES (1, 'AI for Accessibility', 'Equal Tech', 'Asia-Pacific', true), (2, 'Ethical AI Education', 'Tech Learning', 'North America', true), (3, 'Digital Divide Research', 'Global Connect', 'Europe', false);", "sql": "SELECT COUNT(*) FROM AIProjects WHERE region = 'Europe' AND completed = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Identify the departments with the highest and lowest number of employees", "schema": "CREATE TABLE Departments (id INT, department_name VARCHAR(50), employee_id INT); CREATE TABLE Employees (id INT, salary DECIMAL(10, 2));", "sql": "SELECT department_name, COUNT(*) AS employees_count FROM Departments JOIN Employees ON Departments.employee_id = Employees.id GROUP BY department_name ORDER BY employees_count DESC, department_name LIMIT 1; SELECT department_name, COUNT(*) AS employees_count FROM Departments JOIN Employees ON Departments.employee_id = Employees.id GROUP BY department_name ORDER BY employees_count ASC, department_name LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 412, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What date did the episode air that had n/a for it's bbc three weekly ranking?", "schema": "CREATE TABLE table_24399615_4 (airdate VARCHAR, bbc_three_weekly_ranking VARCHAR)", "sql": "SELECT airdate FROM table_24399615_4 WHERE bbc_three_weekly_ranking = 'N/A';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total amount of humanitarian assistance provided by year?", "schema": "CREATE TABLE humanitarian_assistance (id INT PRIMARY KEY, amount FLOAT, year INT, country VARCHAR(255));", "sql": "SELECT year, SUM(amount) as total_assistance FROM humanitarian_assistance GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Who are the legal aid providers in New York that have served the most clients in the last 3 years?", "schema": "CREATE TABLE legal_aid_providers (id INT, name VARCHAR(50), state VARCHAR(2)); INSERT INTO legal_aid_providers (id, name, state) VALUES (1, 'Legal Aid Society', 'NY'); CREATE TABLE clients (id INT, provider_id INT, year INT);", "sql": "SELECT legal_aid_providers.name, COUNT(clients.id) AS client_count FROM legal_aid_providers INNER JOIN clients ON legal_aid_providers.id = clients.provider_id WHERE clients.year BETWEEN YEAR(CURRENT_DATE()) - 3 AND YEAR(CURRENT_DATE()) GROUP BY legal_aid_providers.name ORDER BY client_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 297, "num_statements": 1} {"question": "What is the average salinity of the ocean in each hemisphere?", "schema": "CREATE TABLE ocean_salinity (id INT, year INT, hemisphere VARCHAR(50), avg_salinity FLOAT); INSERT INTO ocean_salinity (id, year, hemisphere, avg_salinity) VALUES (1, 2020, 'Northern Hemisphere', 35); INSERT INTO ocean_salinity (id, year, hemisphere, avg_salinity) VALUES (2, 2020, 'Southern Hemisphere', 34.7);", "sql": "SELECT hemisphere, AVG(avg_salinity) FROM ocean_salinity GROUP BY hemisphere;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the name of the AI algorithm with the lowest fairness score in the 'algorithmic_fairness' table?", "schema": "CREATE TABLE algorithmic_fairness (algorithm_id INT, name TEXT, fairness_score FLOAT); INSERT INTO algorithmic_fairness (algorithm_id, name, fairness_score) VALUES (1, 'AlgorithmA', 0.75), (2, 'AlgorithmB', 0.68), (3, 'AlgorithmC', 0.81);", "sql": "SELECT name FROM algorithmic_fairness ORDER BY fairness_score ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 50).", "schema": null, "sql": "SELECT * FROM check_test(\n can( 'pg_catalog', ARRAY['lower', 'foo', 'bar'], 'whatever' ),\n false,\n 'fail can(schema) with desc',\n 'whatever',\n ' pg_catalog.foo() missing\n pg_catalog.bar() missing'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 220, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the 1990 world cup qualifying competition?", "schema": "CREATE TABLE table_name_55 (result VARCHAR, competition VARCHAR)", "sql": "SELECT result FROM table_name_55 WHERE competition = '1990 world cup qualifying';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: report the total number of degrees granted between 1998 and 2002.", "schema": "CREATE TABLE campuses (campus VARCHAR, id VARCHAR); CREATE TABLE degrees (degrees INTEGER, campus VARCHAR, year VARCHAR)", "sql": "SELECT T1.campus, SUM(T2.degrees) FROM campuses AS T1 JOIN degrees AS T2 ON T1.id = T2.campus WHERE T2.year >= 1998 AND T2.year <= 2002 GROUP BY T1.campus;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Fuzzy Zoeller's score in the United States?", "schema": "CREATE TABLE table_name_82 (score VARCHAR, country VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_82 WHERE country = 'united states' AND player = 'fuzzy zoeller';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 83).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('2001-01-01 00:00 Nehwon/Lankhmar', 'timestamptz');", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('2001-01-01 00:00 Nehwon/Lankhmar', 'timestamptz')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which constructor has a Grid smaller than 18, and a Driver of mika häkkinen?", "schema": "CREATE TABLE table_name_14 (constructor VARCHAR, grid VARCHAR, driver VARCHAR)", "sql": "SELECT constructor FROM table_name_14 WHERE grid < 18 AND driver = 'mika häkkinen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Engine had a chassis of March 85c?", "schema": "CREATE TABLE table_name_72 (engine VARCHAR, chassis VARCHAR)", "sql": "SELECT engine FROM table_name_72 WHERE chassis = 'march 85c';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many millions of viewers watched the episode directed by Christine Moore?", "schema": "CREATE TABLE table_23255941_1 (viewers__in_millions_ VARCHAR, directed_by VARCHAR)", "sql": "SELECT viewers__in_millions_ FROM table_23255941_1 WHERE directed_by = 'Christine Moore';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Delete records in the water_usage table where the usage is less than 100 liters", "schema": "CREATE TABLE water_usage (id INT, location VARCHAR(50), usage FLOAT);", "sql": "DELETE FROM water_usage WHERE usage < 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the average daily ridership for each subway line in Seoul?", "schema": "CREATE TABLE subway (line_id INT, city VARCHAR(50), daily_ridership INT); INSERT INTO subway (line_id, city, daily_ridership) VALUES (1, 'Tokyo', 300000), (2, 'Tokyo', 450000), (3, 'Tokyo', 400000), (4, 'Tokyo', 500000), (5, 'Seoul', 250000), (6, 'Seoul', 300000);", "sql": "SELECT line_id, city, AVG(daily_ridership) FROM subway WHERE city = 'Seoul' GROUP BY line_id, city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "How many volunteers engaged in each program, by month?", "schema": "CREATE TABLE programs (id INT, name VARCHAR(255)); INSERT INTO programs (id, name) VALUES (1, 'Education'), (2, 'Health'), (3, 'Environment'); CREATE TABLE volunteer_hours (id INT, program_id INT, volunteer_date DATE, hours INT); INSERT INTO volunteer_hours (id, program_id, volunteer_date, hours) VALUES (1, 1, '2022-01-01', 5), (2, 1, '2022-01-02', 3), (3, 2, '2022-01-03', 6), (4, 3, '2022-01-04', 4);", "sql": "SELECT program_id, DATE_TRUNC('month', volunteer_date) AS month, COUNT(*) OVER (PARTITION BY program_id, month) AS volunteers_per_month FROM volunteer_hours;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the orginal air date for episodes with production code 2acx12?", "schema": "CREATE TABLE table_28210383_1 (original_air_date VARCHAR, prod_code VARCHAR)", "sql": "SELECT original_air_date FROM table_28210383_1 WHERE prod_code = '2ACX12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the top goalscorer for the season 2010-11?", "schema": "CREATE TABLE table_2429942_2 (top_goalscorer VARCHAR, season VARCHAR)", "sql": "SELECT top_goalscorer FROM table_2429942_2 WHERE season = '2010-11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average depth of all marine protected areas in the Pacific Ocean?\"", "schema": "CREATE TABLE marine_protected_areas (id INT, name TEXT, area_size FLOAT, avg_depth FLOAT, ocean TEXT); INSERT INTO marine_protected_areas (id, name, area_size, avg_depth, ocean) VALUES (1, 'Galapagos Marine Reserve', 133000, 200, 'Pacific');", "sql": "SELECT AVG(avg_depth) FROM marine_protected_areas WHERE ocean = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many car manufacturers are headquartered in South Korea?", "schema": "CREATE TABLE Manufacturers (Id INT, Name VARCHAR(100), Country VARCHAR(50)); INSERT INTO Manufacturers (Id, Name, Country) VALUES (1, 'Hyundai', 'South Korea'), (2, 'Kia', 'South Korea');", "sql": "SELECT COUNT(*) FROM Manufacturers WHERE Country = 'South Korea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List the countries, regions, and mobile towers for regions with an average data usage over 75 among broadband subscribers.", "schema": "CREATE TABLE countries (country_id INT PRIMARY KEY, country_name VARCHAR(255)); INSERT INTO countries (country_id, country_name) VALUES (1, 'USA'), (2, 'Canada'), (3, 'Mexico'); CREATE TABLE regions (region_id INT PRIMARY KEY, region_name VARCHAR(255), country_id INT); INSERT INTO regions (region_id, region_name, country_id) VALUES (1, 'East', 1), (2, 'West', 1), (3, 'Central', 2), (4, 'North', 3), (5, 'South', 3); CREATE TABLE mobile_towers (tower_id INT PRIMARY KEY, region_id INT); INSERT INTO mobile_towers (tower_id, region_id) VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); CREATE TABLE broadband_subscribers (subscriber_id INT PRIMARY KEY, region_id INT, data_usage FLOAT); INSERT INTO broadband_subscribers (subscriber_id, region_id, data_usage) VALUES (1, 1, 80.0), (2, 2, 70.0), (3, 3, 90.0), (4, 4, 65.0), (5, 5, 100.0);", "sql": "SELECT c.country_name, r.region_name, m.tower_id FROM countries c JOIN regions r ON c.country_id = r.country_id JOIN mobile_towers m ON r.region_id = m.region_id JOIN broadband_subscribers b ON r.region_id = b.region_id GROUP BY r.region_id HAVING AVG(b.data_usage) > 75;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 271, "num_statements": 1} {"question": "How many artworks were added to museums each year?", "schema": "CREATE TABLE artworks (id INT, museum_id INT, year INT, quantity INT); INSERT INTO artworks (id, museum_id, year, quantity) VALUES (1, 1, 2015, 1200), (2, 1, 2016, 1500), (3, 2, 2014, 1000), (4, 2, 2015, 1300), (5, 3, 2013, 1600), (6, 3, 2014, 1800);", "sql": "SELECT year, SUM(quantity) FROM artworks GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the percentage of total donations made by women in 2021?", "schema": "CREATE TABLE Donors (DonorID int, DonorGender varchar(50), Country varchar(50), AmountDonated numeric(18,2), DonationDate date); INSERT INTO Donors (DonorID, DonorGender, Country, AmountDonated, DonationDate) VALUES (1, 'Female', 'USA', 5000, '2021-01-01'), (2, 'Male', 'Canada', 7000, '2021-02-01'), (3, 'Female', 'Mexico', 8000, '2021-03-01');", "sql": "SELECT (SUM(CASE WHEN DonorGender = 'Female' THEN AmountDonated ELSE 0 END) / SUM(AmountDonated)) * 100 as Percentage FROM Donors WHERE YEAR(DonationDate) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Mixed Doubles won when Oliver Pongratz won the Men's Singles?", "schema": "CREATE TABLE table_12164707_1 (mixed_doubles VARCHAR, mens_singles VARCHAR)", "sql": "SELECT COUNT(mixed_doubles) FROM table_12164707_1 WHERE mens_singles = 'Oliver Pongratz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the success rate of patients who completed a mindfulness-based stress reduction (MBSR) program, compared to those who did not?", "schema": "CREATE TABLE patients (patient_id INT, age INT, gender TEXT, state TEXT); INSERT INTO patients (patient_id, age, gender, state) VALUES (1, 35, 'Female', 'California'); INSERT INTO patients (patient_id, age, gender, state) VALUES (2, 45, 'Male', 'Texas'); CREATE TABLE treatments (treatment_id INT, patient_id INT, treatment TEXT, date DATE, completion_date DATE); INSERT INTO treatments (treatment_id, patient_id, treatment, date, completion_date) VALUES (1, 1, 'MBSR', '2021-01-01', '2021-03-01'); INSERT INTO treatments (treatment_id, patient_id, treatment, date) VALUES (2, 2, 'Medication', '2021-01-02');", "sql": "SELECT AVG(CASE WHEN treatments.completion_date IS NOT NULL THEN 1 ELSE 0 END) AS mbsr_completers_success_rate, AVG(CASE WHEN treatments.completion_date IS NULL THEN 1 ELSE 0 END) AS mbsr_non_completers_success_rate FROM treatments INNER JOIN patients ON treatments.patient_id = patients.patient_id WHERE treatments.treatment = 'MBSR';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 335, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What competition had its first match on December 20, 2007?", "schema": "CREATE TABLE table_name_15 (competition VARCHAR, first_match VARCHAR)", "sql": "SELECT competition FROM table_name_15 WHERE first_match = 'december 20, 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE SCHEMA (example 4).", "schema": null, "sql": "CREATE SCHEMA hollywood CREATE TABLE films (title text, release date, awards text[]) CREATE VIEW winners AS SELECT title, release FROM films WHERE awards IS NOT NULL;", "explanation": "PostgreSQL CREATE SCHEMA command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest number of episodes in a season?", "schema": "CREATE TABLE table_2113721_7 (episodes INTEGER)", "sql": "SELECT MIN(episodes) FROM table_2113721_7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the box score during a home game of the Adelaide 36ers?", "schema": "CREATE TABLE table_name_39 (Box VARCHAR, home_team VARCHAR)", "sql": "SELECT Box AS score FROM table_name_39 WHERE home_team = 'adelaide 36ers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the pole position of the Portuguese Grand Prix?", "schema": "CREATE TABLE table_name_54 (pole_position VARCHAR, grand_prix VARCHAR)", "sql": "SELECT pole_position FROM table_name_54 WHERE grand_prix = 'portuguese grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Country, when Place is \"T8\", and when Score is \"70-67=137\"?", "schema": "CREATE TABLE table_name_98 (country VARCHAR, place VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_98 WHERE place = 't8' AND score = 70 - 67 = 137;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many drugs were approved in 'Asia' in 2021?", "schema": "CREATE TABLE drug_approval (drug_name TEXT, year INT, region TEXT); INSERT INTO drug_approval (drug_name, year, region) VALUES ('DrugX', 2019, 'Europe'), ('DrugX', 2020, 'Asia'), ('DrugY', 2018, 'Asia'), ('DrugY', 2020, 'Asia'), ('DrugZ', 2021, 'Asia'), ('DrugA', 2021, 'Asia');", "sql": "SELECT COUNT(DISTINCT drug_name) FROM drug_approval WHERE year = 2021 AND region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "List all campaigns in New York that started after 2018-01-01.", "schema": "CREATE TABLE campaigns (campaign_id INT, name TEXT, start_date DATE, location TEXT); INSERT INTO campaigns (campaign_id, name, start_date, location) VALUES (1, 'End Stigma', '2017-12-01', 'New York'); INSERT INTO campaigns (campaign_id, name, start_date, location) VALUES (2, 'Mental Health Matters', '2019-06-01', 'California');", "sql": "SELECT name, start_date FROM campaigns WHERE location = 'New York' AND start_date > '2018-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Calculate the percentage of community engagement events in each city, ordered by the percentage in descending order.", "schema": "CREATE TABLE community_events (event_id INT, event_name TEXT, city TEXT, year INT); INSERT INTO community_events (event_id, event_name, city, year) VALUES (1, 'Cultural Festival', 'New York', 2020), (2, 'Traditional Music Concert', 'Los Angeles', 2019);", "sql": "SELECT city, ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM community_events) , 2) as percentage FROM community_events GROUP BY city ORDER BY percentage DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was episode 02x02 first broadcast?", "schema": "CREATE TABLE table_29141354_2 (first_broadcast VARCHAR, episode VARCHAR)", "sql": "SELECT first_broadcast FROM table_29141354_2 WHERE episode = '02x02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many validations are listed for the iin range 36?", "schema": "CREATE TABLE table_15905399_1 (validation VARCHAR, iin_ranges VARCHAR)", "sql": "SELECT COUNT(validation) FROM table_15905399_1 WHERE iin_ranges = '36';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What census ranking has an area greater than 578.28 km2?", "schema": "CREATE TABLE table_name_17 (census_ranking VARCHAR, area_km_2 INTEGER)", "sql": "SELECT census_ranking FROM table_name_17 WHERE area_km_2 > 578.28;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total revenue generated by indigenous artworks sold in North America, broken down by country?", "schema": "CREATE TABLE Artists (ArtistID INT, ArtistName VARCHAR(50), Country VARCHAR(50), Ethnicity VARCHAR(50)); CREATE TABLE ArtPieces (ArtPieceID INT, ArtistID INT, Price INT); CREATE TABLE Sales (SaleID INT, ArtPieceID INT, Year INT, Quantity INT); INSERT INTO Artists VALUES (1, 'Artist 1', 'USA', 'Indigenous'), (2, 'Artist 2', 'Canada', 'Indigenous'), (3, 'Artist 3', 'Mexico', 'Indigenous'); INSERT INTO ArtPieces VALUES (1, 1, 5000), (2, 1, 7000), (3, 2, 6000), (4, 2, 8000), (5, 3, 11000), (6, 3, 13000); INSERT INTO Sales VALUES (1, 1, 2021, 2), (2, 2, 2021, 1), (3, 3, 2021, 3), (4, 4, 2021, 2), (5, 5, 2021, 1), (6, 6, 2021, 3);", "sql": "SELECT A.Country, SUM(AP.Price * S.Quantity) AS TotalRevenue FROM Artists A INNER JOIN ArtPieces AP ON A.ArtistID = AP.ArtistID INNER JOIN Sales S ON AP.ArtPieceID = S.ArtPieceID WHERE A.Ethnicity = 'Indigenous' AND A.Country IN ('USA', 'Canada', 'Mexico') GROUP BY A.Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 276, "num_statements": 1} {"question": "What is the percentage of total complaints for each service category in Quebec in 2021?", "schema": "CREATE TABLE QuebecComplaints (service VARCHAR(30), complaints INT); INSERT INTO QuebecComplaints (service, complaints) VALUES ('Transportation', 1200), ('Utilities', 900), ('Education', 700), ('Healthcare', 1500);", "sql": "SELECT service, (complaints * 100.0 / (SELECT SUM(complaints) FROM QuebecComplaints)) AS percentage FROM QuebecComplaints;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many deciles have a Gender of coed, an Authority of state, and a Name of mount maunganui school?", "schema": "CREATE TABLE table_name_93 (decile INTEGER, name VARCHAR, gender VARCHAR, authority VARCHAR)", "sql": "SELECT SUM(decile) FROM table_name_93 WHERE gender = 'coed' AND authority = 'state' AND name = 'mount maunganui school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the result is d, who are the opponents?", "schema": "CREATE TABLE table_name_19 (opponents VARCHAR, result VARCHAR)", "sql": "SELECT opponents FROM table_name_19 WHERE result = 'd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Joint Music Awards are there when the Total is larger than 18, in a Year after 2007?", "schema": "CREATE TABLE table_name_32 (joint_music_award VARCHAR, total VARCHAR, year VARCHAR)", "sql": "SELECT joint_music_award FROM table_name_32 WHERE total > 18 AND year > 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Which sustainable tourism activities in Spain are rated 5?", "schema": "CREATE TABLE activities (activity_id INT, activity_name VARCHAR(50), country VARCHAR(50), rating INT); INSERT INTO activities (activity_id, activity_name, country, rating) VALUES (1, 'Hiking Adventure', 'Spain', 5), (2, 'Bird Watching', 'Spain', 4), (3, 'Nature Photography', 'Spain', 5), (4, 'Bike Tour', 'Spain', 4);", "sql": "SELECT activity_name FROM activities WHERE country = 'Spain' AND rating = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 140).", "schema": null, "sql": "INSERT INTO UNIQUE_TBL (t) VALUES ('six');", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par number of the person who won in 2003?", "schema": "CREATE TABLE table_name_40 (to_par VARCHAR, year_s__won VARCHAR)", "sql": "SELECT to_par FROM table_name_40 WHERE year_s__won = '2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total number of expeditions for each organization?", "schema": "CREATE TABLE expedition (org VARCHAR(20), depth INT); INSERT INTO expedition VALUES ('Ocean Explorer', 2500), ('Ocean Explorer', 3000), ('Sea Discoverers', 2000), ('Marine Investigators', 4000), ('Marine Investigators', 4500);", "sql": "SELECT org, COUNT(*) FROM expedition GROUP BY org;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average total when gold is 0, bronze is 0, and silver is smaller than 1?", "schema": "CREATE TABLE table_name_54 (total INTEGER, silver VARCHAR, gold VARCHAR, bronze VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_54 WHERE gold = 0 AND bronze = 0 AND silver < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the lap with a finish of 24?", "schema": "CREATE TABLE table_name_60 (laps INTEGER, finish VARCHAR)", "sql": "SELECT SUM(laps) FROM table_name_60 WHERE finish = '24';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the minimum number of followers for users who have posted about #veganism in the past week?", "schema": "CREATE TABLE users (id INT, followers INT, posts TEXT);", "sql": "SELECT MIN(followers) FROM users WHERE posts LIKE '%#veganism%' AND posts IS NOT NULL AND posts != '';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'create_index': Write the SELECT query (example 151).", "schema": null, "sql": "SELECT * FROM array_index_op_test WHERE t && '{}' ORDER BY seqno;", "explanation": "Regression test for Create Index in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM array_index_op_test WHERE t && '{}' ORDER BY seqno) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Kuala Lumpur value with a Durban value less than 1 and a Mar Del Plata value greater than 0?", "schema": "CREATE TABLE table_name_82 (kuala_lumpur INTEGER, durban VARCHAR, mar_del_plata VARCHAR)", "sql": "SELECT MAX(kuala_lumpur) FROM table_name_82 WHERE durban < 1 AND mar_del_plata > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What are the names, populations, and regions of species that have been sighted since 2019, excluding species from the Arctic and Antarctic?", "schema": "CREATE TABLE Species (id INT, name VARCHAR(100), population INT, region VARCHAR(100), last_sighting DATE); INSERT INTO Species (id, name, population, region, last_sighting) VALUES (1, 'Polar Bear', 25000, 'Arctic', '2018-01-01'); INSERT INTO Species (id, name, population, region, last_sighting) VALUES (2, 'Arctic Fox', 30000, 'Arctic', '2020-02-01'); INSERT INTO Species (id, name, population, region, last_sighting) VALUES (3, 'Reindeer', 40000, 'Norway', '2021-01-01'); INSERT INTO Species (id, name, population, region, last_sighting) VALUES (4, 'Penguin', 50000, 'Antarctic', '2019-01-01');", "sql": "SELECT name, population, region FROM Species WHERE last_sighting >= '2019-01-01' AND region NOT IN ('Arctic', 'Antarctic');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Find the water usage data with the highest usage amount in the water_usage table", "schema": "CREATE TABLE water_usage ( date DATE, usage_category VARCHAR(20), region VARCHAR(20), usage_amount INT ); INSERT INTO water_usage (date, usage_category, region, usage_amount) VALUES ( '2022-07-01', 'Residential', 'Northeast', 15000), ('2022-07-02', 'Industrial', 'Midwest', 200000), ('2022-07-03', 'Agricultural', 'West', 800000);", "sql": "SELECT * FROM water_usage WHERE usage_amount = (SELECT MAX(usage_amount) FROM water_usage);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the total amount of minerals extracted by each company, considering only environmentally friendly mining methods?", "schema": "CREATE TABLE mining_companies (company_id INT, company_name TEXT); INSERT INTO mining_companies (company_id, company_name) VALUES (1, 'CompanyX'), (2, 'CompanyY'); CREATE TABLE mining_methods (method_id INT, method_name TEXT, is_environmentally_friendly BOOLEAN); INSERT INTO mining_methods (method_id, method_name, is_environmentally_friendly) VALUES (1, 'Open-pit', true), (2, 'Underground', false), (3, 'In-situ', true); CREATE TABLE extraction_data (company_id INT, method_id INT, amount_extracted INT); INSERT INTO extraction_data (company_id, method_id, amount_extracted) VALUES (1, 1, 500), (1, 3, 300), (2, 1, 700), (2, 3, 400);", "sql": "SELECT mc.company_name, SUM(ed.amount_extracted) AS total_amount_extracted FROM extraction_data ed JOIN mining_companies mc ON ed.company_id = mc.company_id JOIN mining_methods mm ON ed.method_id = mm.method_id WHERE mm.is_environmentally_friendly = true GROUP BY mc.company_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 280, "num_statements": 1} {"question": "Determine the inventory turnover rate for each ingredient category", "schema": "CREATE TABLE Ingredients (ingredient_id INT, ingredient_name VARCHAR(255), ingredient_category VARCHAR(255), quantity INT, purchase_price DECIMAL(5,2)); INSERT INTO Ingredients (ingredient_id, ingredient_name, ingredient_category, quantity, purchase_price) VALUES (1, 'Chickpeas', 'Legumes', 50, 1.25), (2, 'Chicken Breast', 'Poultry', 100, 3.50); CREATE TABLE Sales (sales_id INT, ingredient_id INT, quantity INT); INSERT INTO Sales (sales_id, ingredient_id, quantity) VALUES (1, 1, 25), (2, 2, 80);", "sql": "SELECT ingredient_category, SUM(quantity) AS total_quantity, AVG(quantity) AS avg_quantity_sold, SUM(quantity) / (SELECT SUM(quantity) * purchase_price FROM Ingredients, Sales WHERE Ingredients.ingredient_id = Sales.ingredient_id GROUP BY Ingredients.ingredient_id) AS inventory_turnover_rate FROM Ingredients, Sales WHERE Ingredients.ingredient_id = Sales.ingredient_id GROUP BY ingredient_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 400, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_aggregate' (example 28).", "schema": null, "sql": "-- test ordered-set aggs using built-in support functions\ncreate aggregate my_percentile_disc(float8 ORDER BY anyelement) (\n stype = internal,\n sfunc = ordered_set_transition,\n finalfunc = percentile_disc_final,\n finalfunc_extra = true,\n finalfunc_modify = read_write\n);", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 275, "num_statements": 1} {"question": "What is the total number of students and teachers in the 'Education' database?", "schema": "CREATE TABLE student (student_id INT); INSERT INTO student (student_id) VALUES (1), (2), (3); CREATE TABLE teacher (teacher_id INT); INSERT INTO teacher (teacher_id) VALUES (101), (102), (103);", "sql": "SELECT COUNT(*) FROM student; SELECT COUNT(*) FROM teacher;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Who is the mix artist for the closing party setlist where artist 1 is Dillinja and Skibadee?", "schema": "CREATE TABLE table_29264319_1 (mix_artist VARCHAR, setlist VARCHAR, artist_1 VARCHAR)", "sql": "SELECT mix_artist FROM table_29264319_1 WHERE setlist = 'Closing Party' AND artist_1 = 'Dillinja and Skibadee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the round of the new york jets NFL club, which has a pick less than 166?", "schema": "CREATE TABLE table_name_60 (round INTEGER, nfl_club VARCHAR, pick VARCHAR)", "sql": "SELECT SUM(round) FROM table_name_60 WHERE nfl_club = 'new york jets' AND pick < 166;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of races held after 12:00:00 or before 09:00:00?", "schema": "CREATE TABLE races (name VARCHAR, TIME VARCHAR)", "sql": "SELECT name FROM races WHERE TIME > \"12:00:00\" OR TIME < \"09:00:00\";", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Find the number of AI safety incidents and their severity, partitioned by incident type, ordered by severity in descending order?", "schema": "CREATE TABLE ai_safety_incidents (incident_id INT, incident_type VARCHAR(50), severity DECIMAL(3,2)); INSERT INTO ai_safety_incidents (incident_id, incident_type, severity) VALUES (1, 'Cybersecurity', 0.75), (2, 'Data Privacy', 0.85), (3, 'Algorithmic Bias', 0.95), (4, 'Ethical Concerns', 1.00);", "sql": "SELECT incident_type, COUNT(*) as num_incidents, AVG(severity) as avg_severity FROM ai_safety_incidents GROUP BY incident_type ORDER BY avg_severity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "List all decentralized applications that have had a regulatory action taken against them and their associated smart contracts in the South American region.", "schema": "CREATE TABLE decentralized_applications (dapp_id INT, dapp_name VARCHAR(50), region VARCHAR(50)); CREATE TABLE smart_contracts (contract_id INT, dapp_id INT, contract_name VARCHAR(50), region VARCHAR(50)); CREATE TABLE regulatory_actions (action_id INT, contract_id INT, action_date DATE);", "sql": "SELECT d.dapp_name, s.contract_name FROM decentralized_applications d INNER JOIN smart_contracts s ON d.dapp_id = s.dapp_id INNER JOIN regulatory_actions r ON s.contract_id = r.contract_id WHERE d.region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 23).", "schema": null, "sql": "SELECT var_pop(b) FROM aggtest;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT var_pop(b) FROM aggtest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_procedure' (example 5).", "schema": null, "sql": "$$;\n\nCALL ptest3('b');\n\nSELECT * FROM cp_test;\n\n\n-- output arguments\n\nCREATE PROCEDURE ptest4a(INOUT a int, INOUT b int)\nLANGUAGE SQL\nAS $$\nSELECT 1, 2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Procedure.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: Which Cuts made has a Tournament of totals, and Wins smaller than 11?", "schema": "CREATE TABLE table_name_25 (cuts_made INTEGER, tournament VARCHAR, wins VARCHAR)", "sql": "SELECT AVG(cuts_made) FROM table_name_25 WHERE tournament = 'totals' AND wins < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What did the team do in the Open Cup in the 2nd, Northeast season of the USISL D-3 Pro League?", "schema": "CREATE TABLE table_1939214_1 (open_cup VARCHAR, league VARCHAR, regular_season VARCHAR)", "sql": "SELECT open_cup FROM table_1939214_1 WHERE league = 'USISL D-3 Pro league' AND regular_season = '2nd, Northeast';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's Babson College's enrollment?", "schema": "CREATE TABLE table_1974782_1 (enrollment INTEGER, institution VARCHAR)", "sql": "SELECT MAX(enrollment) FROM table_1974782_1 WHERE institution = 'Babson College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Venue on 26 jan 2005", "schema": "CREATE TABLE table_name_12 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_12 WHERE date = '26 jan 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did Konchesky move to?", "schema": "CREATE TABLE table_name_34 (moving_to VARCHAR, name VARCHAR)", "sql": "SELECT moving_to FROM table_name_34 WHERE name = 'konchesky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Rank has a Scorer of lee sang-cheol?", "schema": "CREATE TABLE table_name_69 (rank VARCHAR, scorer VARCHAR)", "sql": "SELECT rank FROM table_name_69 WHERE scorer = 'lee sang-cheol';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What were the total sales for a specific drug in Q1 2021?", "schema": "CREATE TABLE drug_sales(drug_id INT, sale_date DATE, amount DECIMAL(10,2)); INSERT INTO drug_sales(drug_id, sale_date, amount) VALUES (1, '2021-01-01', 1000), (1, '2021-01-15', 1500), (2, '2021-01-01', 2000), (2, '2021-01-15', 2500);", "sql": "SELECT drug_id, SUM(amount) as total_sales FROM drug_sales WHERE sale_date BETWEEN '2021-01-01' AND '2021-03-31' GROUP BY drug_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'citext' (item 16).", "schema": null, "sql": "CREATE FUNCTION s.index_this_expr(s.citext, bool) RETURNS s.citext IMMUTABLE\n LANGUAGE SQL AS $$SELECT $1$$;\nREVOKE ALL ON FUNCTION public.setter FROM PUBLIC;\nREVOKE ALL ON FUNCTION s.const FROM PUBLIC;\nREVOKE ALL ON FUNCTION s.index_this_expr FROM PUBLIC;\n-- Even for an empty table, expression planning calls s.const & public.setter.\nGRANT EXECUTE ON FUNCTION public.setter TO regress_minimal;\nGRANT EXECUTE ON FUNCTION s.const TO regress_minimal;\n-- Function for index predicate.\nCREATE FUNCTION s.index_row_if(s.citext) RETURNS bool IMMUTABLE\n LANGUAGE SQL AS $$SELECT $1 IS NOT NULL$$;", "explanation": "SQL definition from the 'citext' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dcl_security", "is_postgresql_specific": false, "sql_length": 592, "num_statements": 7} {"question": "Generate PostgreSQL SQL for: What is the highest Goals Against, when Club is \"Pontevedra CF\", and when Played is less than 38?", "schema": "CREATE TABLE table_name_74 (goals_against INTEGER, club VARCHAR, played VARCHAR)", "sql": "SELECT MAX(goals_against) FROM table_name_74 WHERE club = 'pontevedra cf' AND played < 38;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Delete all menu items with a price over 20 dollars", "schema": "CREATE TABLE menu_items (item_id INT, item_name VARCHAR(50), price DECIMAL(5,2)); INSERT INTO menu_items (item_id, item_name, price) VALUES (1, 'Steak', 25.99), (2, 'Salad', 12.49), (3, 'Pasta', 18.99);", "sql": "DELETE FROM menu_items WHERE price > 20.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total weight of organic fruits exported to Germany?", "schema": "CREATE TABLE FruitExport(id INT, name TEXT, weight FLOAT, is_organic BOOLEAN, country TEXT); INSERT INTO FruitExport(id, name, weight, is_organic, country) VALUES (1, 'Bananas', 450.3, TRUE, 'Germany'), (2, 'Apples', 300.2, FALSE, 'Germany');", "sql": "SELECT SUM(weight) FROM FruitExport WHERE name = 'Bananas' AND is_organic = TRUE AND country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the competition when the date is 1995-08-06?", "schema": "CREATE TABLE table_name_35 (competition VARCHAR, date VARCHAR)", "sql": "SELECT competition FROM table_name_35 WHERE date = '1995-08-06';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Visitor of dallas, and a Date of june 12 had what highest attendance?", "schema": "CREATE TABLE table_name_16 (attendance INTEGER, visitor VARCHAR, date VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_16 WHERE visitor = 'dallas' AND date = 'june 12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average length of articles written by female authors in Africa?", "schema": "CREATE TABLE authors (author_id INT, gender VARCHAR(6), country VARCHAR(50)); CREATE TABLE articles (article_id INT, author_id INT, length_words INT); INSERT INTO authors VALUES (1, 'female', 'Nigeria'); INSERT INTO articles VALUES (1, 1, 1200);", "sql": "SELECT AVG(length_words) FROM authors INNER JOIN articles ON authors.author_id = articles.author_id WHERE authors.gender = 'female' AND country IN ('Nigeria', 'South Africa', 'Egypt');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What is the average sustainable sourcing score for each restaurant category?", "schema": "CREATE SCHEMA FoodService;CREATE TABLE Sustainability (sustainability_id INT, restaurant_id INT, category VARCHAR(50), score INT); INSERT INTO Sustainability (sustainability_id, restaurant_id, category, score) VALUES (1, 1, 'dining', 85), (2, 1, 'dining', 90), (3, 2, 'takeout', 70), (4, 2, 'takeout', 75);", "sql": "SELECT category, AVG(score) as avg_score FROM Sustainability GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List all astronauts who have never been on a space mission", "schema": "CREATE TABLE Astronauts(ID INT, Name VARCHAR(50), FirstMissionDate DATE);", "sql": "SELECT Name FROM Astronauts WHERE FirstMissionDate IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average number of likes and comments for posts in the past week?", "schema": "CREATE TABLE posts (id INT PRIMARY KEY, user_id INT, post_date DATETIME, content TEXT, likes INT, comments INT);", "sql": "SELECT AVG(likes + comments) FROM posts WHERE post_date >= DATEADD(week, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Which locations have had more than 10 marine species observations in a given year, along with the number of observations?", "schema": "CREATE TABLE MarineSpeciesObservations (observation_id INT, species VARCHAR(255), location VARCHAR(255), year INT); INSERT INTO MarineSpeciesObservations (observation_id, species, location, year) VALUES (1, 'Dolphin', 'Pacific Ocean', 2020); INSERT INTO MarineSpeciesObservations (observation_id, species, location, year) VALUES (2, 'Shark', 'Atlantic Ocean', 2019);", "sql": "SELECT location, COUNT(*) FROM MarineSpeciesObservations GROUP BY location HAVING COUNT(*) > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total number of space missions funded by NASA and ESA?", "schema": "CREATE TABLE space_missions (id INT, name VARCHAR(50), space_agency VARCHAR(50)); CREATE TABLE space_agencies (id INT, name VARCHAR(50), acronym VARCHAR(50));", "sql": "SELECT COUNT(*) FROM space_missions WHERE space_agency IN (SELECT name FROM space_agencies WHERE acronym IN ('NASA', 'ESA'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Show the total visitor count for each department's temporary exhibitions, excluding the \"Art\" department.", "schema": "CREATE TABLE DepartmentExhibits (department_name TEXT, exhibit_name TEXT, visitor_count INTEGER); INSERT INTO DepartmentExhibits (department_name, exhibit_name, visitor_count) VALUES ('Art', 'Impressionism', 2000), ('Art', 'Cubism', 1500), ('History', 'Ancient Egypt', 3000); CREATE TABLE DepartmentTotals (department_name TEXT, total_visitors INTEGER); INSERT INTO DepartmentTotals (department_name, total_visitors) SELECT department_name, SUM(visitor_count) FROM DepartmentExhibits WHERE department_name != 'Art' GROUP BY department_name;", "sql": "SELECT department_name, SUM(visitor_count) FROM DepartmentExhibits WHERE department_name != 'Art' GROUP BY department_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Which excavation sites have more artifacts from the Roman period than the Iron Age?", "schema": "CREATE TABLE SiteArtifacts (SiteID INT, ArtifactID INT, Period TEXT); INSERT INTO SiteArtifacts (SiteID, ArtifactID, Period) VALUES (1, 1, 'Roman'), (1, 2, 'Iron Age'), (2, 3, 'Roman');", "sql": "SELECT SiteID FROM SiteArtifacts WHERE Period = 'Roman' GROUP BY SiteID HAVING COUNT(*) > (SELECT COUNT(*) FROM SiteArtifacts WHERE SiteID = SiteArtifacts.SiteID AND Period = 'Iron Age');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Which countries in South America have the highest number of eco-friendly accommodations?", "schema": "CREATE TABLE if NOT EXISTS accommodations (id INT, name TEXT, country TEXT, eco_friendly BOOLEAN); INSERT INTO accommodations (id, name, country, eco_friendly) VALUES (1, 'Eco Lodge', 'Argentina', true), (2, 'Green Retreat', 'Brazil', true), (3, 'Sustainable Hotel', 'Chile', true);", "sql": "SELECT country, COUNT(*) as count FROM accommodations WHERE eco_friendly = true GROUP BY country ORDER BY count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "What is the total assets under management (AUM) for the 'Value' portfolio as of the last day of the previous quarter?", "schema": "CREATE TABLE portfolios (portfolio VARCHAR(20), aum DECIMAL(15, 2)); INSERT INTO portfolios (portfolio, aum) VALUES ('Tech Growth', 5000000.00), ('Value', 7000000.00), ('Small Cap', 3000000.00);", "sql": "SELECT aum FROM portfolios WHERE portfolio = 'Value';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What are the top three mining operations with the highest resource extraction?", "schema": "CREATE TABLE MiningOperations (OperationID INT, OperationName VARCHAR(20), Location VARCHAR(20), ResourcesExtracted INT, OperationDate DATE);", "sql": "SELECT OperationName, ResourcesExtracted FROM MiningOperations WHERE ROW_NUMBER() OVER(ORDER BY ResourcesExtracted DESC) <= 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "List the case IDs and billing amounts for cases that were not billed.", "schema": "CREATE TABLE Cases (CaseID int, BillingAmount decimal(5,2)); INSERT INTO Cases (CaseID, BillingAmount) VALUES (1, 250.00), (2, NULL), (3, 300.00), (4, 150.00), (5, NULL);", "sql": "SELECT CaseID, BillingAmount FROM Cases WHERE BillingAmount IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Record, when Game is 27?", "schema": "CREATE TABLE table_name_54 (record VARCHAR, game VARCHAR)", "sql": "SELECT record FROM table_name_54 WHERE game = 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "How many size 14 jeans were sold in the past month?", "schema": "CREATE TABLE jeans_sales (id INT PRIMARY KEY, size INT, sale_date DATE); INSERT INTO jeans_sales (id, size, sale_date) VALUES (1, 12, '2021-12-01'), (2, 14, '2021-12-05'), (3, 16, '2021-12-10');", "sql": "SELECT COUNT(*) FROM jeans_sales WHERE size = 14 AND sale_date >= DATE '2021-11-01' AND sale_date < DATE '2021-12-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "What is the maximum number of defense contracts signed by a single company in the United States?", "schema": "CREATE TABLE defense_contracts (dc_id INT, dc_company VARCHAR(50), dc_country VARCHAR(50)); INSERT INTO defense_contracts (dc_id, dc_company, dc_country) VALUES (1, 'Company A', 'United States'), (2, 'Company B', 'United States'), (3, 'Company C', 'Canada');", "sql": "SELECT MAX(dc_count) FROM (SELECT COUNT(*) AS dc_count FROM defense_contracts WHERE dc_country = 'United States' GROUP BY dc_company) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the hometowns of gymnasts and the corresponding number of gymnasts?", "schema": "CREATE TABLE gymnast (Gymnast_ID VARCHAR); CREATE TABLE people (Hometown VARCHAR, People_ID VARCHAR)", "sql": "SELECT T2.Hometown, COUNT(*) FROM gymnast AS T1 JOIN people AS T2 ON T1.Gymnast_ID = T2.People_ID GROUP BY T2.Hometown;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the total number of organic products in the ORGANIC_PRODUCTS table?", "schema": "CREATE TABLE ORGANIC_PRODUCTS (id INT, name VARCHAR(50), category VARCHAR(50), quantity INT); INSERT INTO ORGANIC_PRODUCTS (id, name, category, quantity) VALUES (1, 'Quinoa', 'Grains', 50), (2, 'Tofu', 'Proteins', 30);", "sql": "SELECT SUM(quantity) FROM ORGANIC_PRODUCTS;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Display the research grant amounts for all grants awarded to the 'Physics' department.", "schema": "CREATE TABLE grants (grant_id INT, dept_name VARCHAR(255), grant_amount FLOAT); INSERT INTO grants (grant_id, dept_name, grant_amount) VALUES (1, 'Physics', 50000), (2, 'Computer_Science', 75000), (3, 'Physics', 60000);", "sql": "SELECT grant_amount FROM grants WHERE dept_name = 'Physics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the listed owner for mike garvey", "schema": "CREATE TABLE table_24535095_2 (listed_owner_s_ VARCHAR, crew_chief VARCHAR)", "sql": "SELECT listed_owner_s_ FROM table_24535095_2 WHERE crew_chief = 'Mike Garvey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "How many threat intelligence reports were generated per month in 2021?", "schema": "CREATE TABLE Reports (Month VARCHAR(7), Count INT); INSERT INTO Reports (Month, Count) VALUES ('Jan-2021', 50), ('Feb-2021', 65), ('Mar-2021', 70), ('Apr-2021', 75), ('May-2021', 80), ('Jun-2021', 85), ('Jul-2021', 90), ('Aug-2021', 95), ('Sep-2021', 100), ('Oct-2021', 105), ('Nov-2021', 110), ('Dec-2021', 115);", "sql": "SELECT STR_TO_DATE(Month, '%b-%Y') AS Month, COUNT(*) FROM Reports GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Avg. risk rating of sustainable investments in the EU", "schema": "CREATE TABLE sustainable_investment_risk(investment_id INT, risk_rating INT, investment_type VARCHAR(20));", "sql": "SELECT AVG(risk_rating) FROM sustainable_investment_risk WHERE investment_type = 'sustainable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the percentage of habitats that are fully protected, by region?", "schema": "CREATE TABLE habitat_data (habitat_id INT, habitat_type VARCHAR(255), region VARCHAR(255), protection_level VARCHAR(255)); INSERT INTO habitat_data (habitat_id, habitat_type, region, protection_level) VALUES (1, 'Forest', 'North', 'Full'), (2, 'Forest', 'North', 'Partial'), (3, 'Savannah', 'South', 'Full'), (4, 'Savannah', 'South', 'Partial'), (5, 'Wetlands', 'East', 'Full'), (6, 'Wetlands', 'East', 'Full'), (7, 'Mountains', 'West', 'Partial'), (8, 'Mountains', 'West', 'Partial');", "sql": "SELECT region, (COUNT(CASE WHEN protection_level = 'Full' THEN 1 END)::float/COUNT(habitat_id))*100 AS protection_percentage FROM habitat_data GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Republican ran against the American Labor candidate Matthew J. Merritt?", "schema": "CREATE TABLE table_name_5 (republican_ticket VARCHAR, american_labor_ticket VARCHAR)", "sql": "SELECT republican_ticket FROM table_name_5 WHERE american_labor_ticket = 'matthew j. merritt';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the home team when Sheffield United is the away team?", "schema": "CREATE TABLE table_name_38 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team FROM table_name_38 WHERE away_team = 'sheffield united';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of ad impressions in 'advertising_performance' table for the last month?", "schema": "CREATE TABLE advertising_performance (ad_id INT, user_id INT, ad_impressions INT, ad_date DATE);", "sql": "SELECT ad_date, SUM(ad_impressions) FROM advertising_performance WHERE ad_date >= CURDATE() - INTERVAL 1 MONTH GROUP BY ad_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath_encoding': Write the SELECT query (example 12).", "schema": null, "sql": "select '$.\"null \\\\u0000 escape\"'::jsonpath as not_an_escape;", "explanation": "Regression test for Jsonpath Encoding in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$.\"null \\\\u0000 escape\"'::jsonpath as not_an_escape) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT test_json -> 2 FROM test_jsonb WHERE json_type = 'object';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT test_json -> 2 FROM test_jsonb WHERE json_type = 'object') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue of the match on 5 April 2000?", "schema": "CREATE TABLE table_name_45 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_45 WHERE date = '5 april 2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average construction labor cost per hour in the state of California?", "schema": "CREATE TABLE labor_costs (cost_id INT, labor_cost DECIMAL, state TEXT); INSERT INTO labor_costs VALUES (1, 45.5, 'California'), (2, 35.0, 'Texas'), (3, 50.0, 'New York'), (4, 40.0, 'California');", "sql": "SELECT AVG(labor_cost) FROM labor_costs WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which department has more than 1 head at a time? List the id, name and the number of heads.", "schema": "CREATE TABLE management (department_id VARCHAR); CREATE TABLE department (department_id VARCHAR, name VARCHAR)", "sql": "SELECT T1.department_id, T1.name, COUNT(*) FROM management AS T2 JOIN department AS T1 ON T1.department_id = T2.department_id GROUP BY T1.department_id HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the ratio of male to female healthcare providers in the \"rural_clinics_2\" table?", "schema": "CREATE TABLE rural_clinics_2 (id INT, name TEXT, age INT, gender TEXT); INSERT INTO rural_clinics_2 (id, name, age, gender) VALUES (1, 'Clinic C', 50, 'Male'), (2, 'Clinic D', 40, 'Female'), (3, 'Clinic E', 55, 'Male');", "sql": "SELECT ROUND(COUNT(CASE WHEN gender = 'Male' THEN 1 END)/COUNT(CASE WHEN gender = 'Female' THEN 1 END), 2) FROM rural_clinics_2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "List all the broadband subscribers with their subscription start dates in the Northeast region who have been active for less than 3 months.", "schema": "CREATE TABLE subscribers(id INT, subscription_start_date DATE, region VARCHAR(10), subscription_type VARCHAR(10)); INSERT INTO subscribers VALUES (1, '2022-08-15', 'Northeast', 'broadband');", "sql": "SELECT subscribers.id, subscribers.subscription_start_date FROM subscribers WHERE subscribers.region = 'Northeast' AND subscribers.subscription_type = 'broadband' AND DATEDIFF(CURDATE(), subscribers.subscription_start_date) < 90;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many million u.s. viewers saw the episode that was directed by roger young and written by debra j. fisher?", "schema": "CREATE TABLE table_28037619_2 (us_viewers__million_ VARCHAR, directed_by VARCHAR, written_by VARCHAR)", "sql": "SELECT us_viewers__million_ FROM table_28037619_2 WHERE directed_by = 'Roger Young' AND written_by = 'Debra J. Fisher';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the average number of families assisted per day by each community center in Q3 of 2021?", "schema": "CREATE TABLE Community_Centers (cc_name TEXT, families_assisted INTEGER, assist_date DATE); INSERT INTO Community_Centers (cc_name, families_assisted, assist_date) VALUES ('Center A', 10, '2021-07-04'); INSERT INTO Community_Centers (cc_name, families_assisted, assist_date) VALUES ('Center B', 15, '2021-08-18');", "sql": "SELECT cc_name, AVG(families_assisted/DATEDIFF('2021-10-01', assist_date)) FROM Community_Centers WHERE assist_date BETWEEN '2021-07-01' AND '2021-09-30' GROUP BY cc_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was perfomer one on 30 January 1988?", "schema": "CREATE TABLE table_14934885_1 (performer_1 VARCHAR, date VARCHAR)", "sql": "SELECT performer_1 FROM table_14934885_1 WHERE date = '30 January 1988';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the record when the score was 2–0?", "schema": "CREATE TABLE table_name_45 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_45 WHERE score = '2–0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was home with a record of 7–5–2?", "schema": "CREATE TABLE table_name_77 (home VARCHAR, record VARCHAR)", "sql": "SELECT home FROM table_name_77 WHERE record = '7–5–2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result when the time was 4:15?", "schema": "CREATE TABLE table_name_66 (res VARCHAR, time VARCHAR)", "sql": "SELECT res FROM table_name_66 WHERE time = '4:15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average operational cost of monitoring stations in the 'monitoring_stations' table, grouped by country?", "schema": "CREATE TABLE monitoring_stations (station_id INT, station_name VARCHAR(50), country VARCHAR(50), operational_cost FLOAT); INSERT INTO monitoring_stations (station_id, station_name, country, operational_cost) VALUES (1, 'Station A', 'Australia', 50000.0), (2, 'Station B', 'New Zealand', 60000.0), (3, 'Station C', 'Australia', 55000.0);", "sql": "SELECT country, AVG(operational_cost) FROM monitoring_stations GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 29).", "schema": null, "sql": "SELECT * FROM test_float4 WHERE i<='1e300'::float8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many geo id with 48.578664 are", "schema": "CREATE TABLE table_18600760_19 (geo_id VARCHAR, latitude VARCHAR)", "sql": "SELECT COUNT(geo_id) FROM table_18600760_19 WHERE latitude = '48.578664';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Display the number of unique traditional art forms in each country.", "schema": "CREATE TABLE Country_Art_Count (country TEXT, art_count INT); INSERT INTO Country_Art_Count (country, art_count) VALUES ('Japan', 2); INSERT INTO Country_Art_Count (country, art_count) VALUES ('India', 3);", "sql": "SELECT country, COUNT(*) AS unique_art_forms FROM (SELECT DISTINCT name, country FROM Traditional_Arts) AS unique_arts GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: After how many opponents was the overall record 1-0-0?", "schema": "CREATE TABLE table_20928682_1 (opponents VARCHAR, record VARCHAR)", "sql": "SELECT COUNT(opponents) FROM table_20928682_1 WHERE record = '1-0-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 29).", "schema": null, "sql": "INSERT INTO toasted_several(toasted_key) VALUES(repeat('9876543210', 10000));", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'amcheck' (example 18).", "schema": null, "sql": "SELECT sum(reads) AS stats_bulkreads_before\n FROM pg_stat_io WHERE context = 'bulkread' \\gset\nBEGIN;", "explanation": "Example query from the 'amcheck' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the maximum speed of the fastest autonomous vehicle?", "schema": "CREATE TABLE AutonomousVehicles (Id INT, Name VARCHAR(100), MaxSpeed FLOAT); INSERT INTO AutonomousVehicles (Id, Name, MaxSpeed) VALUES (1, 'Apollo', 240), (2, 'Buddy', 150), (3, 'RoboCar', 180);", "sql": "SELECT MAX(MaxSpeed) FROM AutonomousVehicles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Calculate the maximum and minimum salary for each job title in the \"employee_data\" and \"jobs\" tables", "schema": "CREATE TABLE employee_data (id INT, job_title VARCHAR(20), salary DECIMAL(10,2)); INSERT INTO employee_data (id, job_title, salary) VALUES (1, 'Engineer', 80000.00), (2, 'Engineer', 85000.00), (3, 'Technician', 60000.00), (4, 'Technician', 65000.00); CREATE TABLE jobs (id INT, title VARCHAR(20)); INSERT INTO jobs (id, title) VALUES (1, 'Engineer'), (2, 'Technician');", "sql": "SELECT j.title, MAX(ed.salary) as max_salary, MIN(ed.salary) as min_salary FROM employee_data ed JOIN jobs j ON ed.job_title = j.title GROUP BY j.title;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the domestic figure when cargo tonnes equal 25 866?", "schema": "CREATE TABLE table_name_61 (domestic VARCHAR, cargo__tonnes_ VARCHAR)", "sql": "SELECT domestic FROM table_name_61 WHERE cargo__tonnes_ = '25 866';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What are the names of the projects in the 'Transportation' table?", "schema": "CREATE TABLE Transportation (project_id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO Transportation (project_id, project_name, location) VALUES (1, 'Bridge Replacement', 'Texas'); INSERT INTO Transportation (project_id, project_name, location) VALUES (2, 'Road Construction', 'Florida');", "sql": "SELECT project_name FROM Transportation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 288).", "schema": null, "sql": "select interval 'P00-0.01-00T2562047788:00:54.775807';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval 'P00-0.01-00T2562047788:00:54.775807') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 61).", "schema": null, "sql": "select * from numrange_test where nr < numrange(0.0, 1.0,'[]');", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from numrange_test where nr < numrange(0.0, 1.0,'[]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Identify the number of genetically modified ingredients in dishes served at 'The Green Table'.", "schema": "CREATE TABLE dishes_gm (dish_id INT, name VARCHAR(50), gm_ingredients INT); INSERT INTO dishes_gm VALUES (1, 'Veggie Burger', 1); INSERT INTO dishes_gm VALUES (2, 'Sweet Potato Fries', 0); CREATE TABLE served_at (dish_id INT, location VARCHAR(50)); INSERT INTO served_at VALUES (1, 'The Green Table'); INSERT INTO served_at VALUES (2, 'The Green Table');", "sql": "SELECT COUNT(dg.gm_ingredients) FROM dishes_gm dg JOIN served_at sa ON dg.dish_id = sa.dish_id WHERE sa.location = 'The Green Table';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Which organizations received donations from donors located in a specific city, based on the 'donations', 'donors', and 'organizations' tables?", "schema": "CREATE TABLE organizations (id INT, organization_name TEXT, organization_city TEXT);CREATE TABLE donors (id INT, name TEXT, email TEXT, donor_city TEXT);CREATE TABLE donations (id INT, donor_id INT, organization_id INT, amount DECIMAL(10,2), donation_date DATE);", "sql": "SELECT organizations.organization_name FROM organizations INNER JOIN donations ON organizations.id = donations.organization_id INNER JOIN donors ON donations.donor_id = donors.id WHERE donors.donor_city = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_import' (example 126).", "schema": null, "sql": "CREATE STATISTICS stats_import.test_stat_ndistinct_exprs (ndistinct)\n ON lower(name), upper(name)\n FROM stats_import.test;", "explanation": "DDL from PostgreSQL core regression test for Stats Import.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plpython' (example 30).", "schema": null, "sql": "SELECT roundtrip('[1]'::jsonb);", "explanation": "Example query from the 'jsonb_plpython' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 31, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 10).", "schema": null, "sql": "CREATE FUNCTION crosstab(text,text)\nRETURNS setof record\nAS 'MODULE_PATHNAME','crosstab_hash'\nLANGUAGE C STABLE STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which church was built in 1600?", "schema": "CREATE TABLE table_name_38 (church_name VARCHAR, year_built VARCHAR)", "sql": "SELECT church_name FROM table_name_38 WHERE year_built = 1600;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Founded has a League of women's flat track derby association, and a Club of omaha rollergirls?", "schema": "CREATE TABLE table_name_14 (founded INTEGER, league VARCHAR, club VARCHAR)", "sql": "SELECT AVG(founded) FROM table_name_14 WHERE league = 'women's flat track derby association' AND club = 'omaha rollergirls';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Andy Roddick is the opponent in the final on what surface?", "schema": "CREATE TABLE table_26202788_7 (surface VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT surface FROM table_26202788_7 WHERE opponent_in_the_final = 'Andy Roddick';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show an example of PostgreSQL SELECT (example 2).", "schema": null, "sql": "SELECT name FROM distributors ORDER BY code;", "explanation": "PostgreSQL SELECT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "How many cruelty-free ingredients are used in total across all products?", "schema": "CREATE TABLE Product (id INT, productName VARCHAR(50), price DECIMAL(5,2)); INSERT INTO Product (id, productName, price) VALUES (4, 'Blush', 14.99), (5, 'Foundation', 29.99), (6, 'Lip Liner', 16.99); CREATE TABLE Ingredient (id INT, productId INT, ingredient VARCHAR(50), sourceCountry VARCHAR(50), crueltyFree BOOLEAN); INSERT INTO Ingredient (id, productId, ingredient, sourceCountry, crueltyFree) VALUES (6, 4, 'Shea Butter', 'Ghana', true), (7, 4, 'Rosehip Oil', 'Chile', true), (8, 5, 'Vitamin E', 'Argentina', true), (9, 5, 'Zinc Oxide', 'Australia', true), (10, 6, 'Jojoba Oil', 'Peru', true);", "sql": "SELECT SUM(I.crueltyFree) as totalCrueltyFreeIngredients FROM Ingredient I;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average age of patients with diabetes in the Northern rural areas of Canada?", "schema": "CREATE TABLE patients (id INT, age INT, has_diabetes BOOLEAN); INSERT INTO patients (id, age, has_diabetes) VALUES (1, 50, true), (2, 60, false); CREATE TABLE locations (id INT, region VARCHAR, is_rural BOOLEAN); INSERT INTO locations (id, region, is_rural) VALUES (1, 'Northern', true), (2, 'Southern', false);", "sql": "SELECT AVG(patients.age) FROM patients INNER JOIN locations ON patients.id = locations.id WHERE patients.has_diabetes = true AND locations.region = 'Northern';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rural population percentage in 1979?", "schema": "CREATE TABLE table_16645_1 (__percentage VARCHAR, rural INTEGER, year__january_ VARCHAR)", "sql": "SELECT MIN(rural), __percentage FROM table_16645_1 WHERE year__january_ = 1979;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the maximum fare for passengers using the 'South' bus route in the last week?", "schema": "CREATE TABLE Routes (RouteID int, RouteName varchar(255), Region varchar(255)); INSERT INTO Routes (RouteID, RouteName, Region) VALUES (1, 'North', 'East'), (2, 'South', 'Central'), (3, 'West', 'West'), (4, 'Red Line', 'East'), (5, 'Green Line', 'North'), (6, 'Blue Line', 'West'), (7, 'South', 'South'); CREATE TABLE Trips (TripID int, RouteID int, Fare double, TripDateTime datetime);", "sql": "SELECT MAX(Fare) FROM Routes JOIN Trips ON Routes.RouteID = Trips.RouteID WHERE Routes.RouteName = 'South' AND Trips.TripDateTime >= DATEADD(week, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years were there with 348 attempts?", "schema": "CREATE TABLE table_1037590_1 (yards VARCHAR, attempts VARCHAR)", "sql": "SELECT COUNT(yards) FROM table_1037590_1 WHERE attempts = 348;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many unique visitors have attended events at 'Art Gallery' in 2021?", "schema": "CREATE TABLE if not exists venue (id INT, name VARCHAR(50)); CREATE TABLE if not exists event_calendar (id INT, venue_id INT, event_date DATE); INSERT INTO venue (id, name) VALUES (1, 'Art Gallery'); INSERT INTO event_calendar (id, venue_id, event_date) VALUES (1, 1, '2021-01-01'), (2, 1, '2021-03-12'), (3, 1, '2022-05-28');", "sql": "SELECT COUNT(DISTINCT e.id) FROM event_calendar ec JOIN (SELECT DISTINCT id FROM event_attendees) e ON ec.id = e.id WHERE ec.venue_id = (SELECT id FROM venue WHERE name = 'Art Gallery') AND ec.event_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "Show a SQL definition from the citus project (merge_repartition2, item 19).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION setup_data() RETURNS VOID SET search_path TO merge_repartition2_schema AS $$\n INSERT INTO pg_source SELECT i, i+1, 1 FROM generate_series(1, 100000) i;\n INSERT INTO pg_target SELECT i, 1 FROM generate_series(50001, 100000) i;\n INSERT INTO citus_source SELECT i, i+1, 1 FROM generate_series(1, 100000) i;\n INSERT INTO citus_target SELECT i, 1 FROM generate_series(50001, 100000) i;\n$$\nLANGUAGE SQL;", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 436, "num_statements": 5} {"question": "Write the DML statement from PostgreSQL regression test 'inherit' (example 16).", "schema": null, "sql": "INSERT INTO b(aa) VALUES('bbbbbbbb');", "explanation": "DML from PostgreSQL core regression test for Inherit.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of points when the draws are more than 1?", "schema": "CREATE TABLE table_name_57 (points VARCHAR, draw INTEGER)", "sql": "SELECT COUNT(points) FROM table_name_57 WHERE draw > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Opposition has a Year smaller than 1977, and a City of manchester?", "schema": "CREATE TABLE table_name_53 (opposition VARCHAR, year VARCHAR, city VARCHAR)", "sql": "SELECT opposition FROM table_name_53 WHERE year < 1977 AND city = 'manchester';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Add a new teacher with a unique teacher_id, teacher_name, and assignment to a school, updating the corresponding school record accordingly.", "schema": "CREATE TABLE schools (school_id INT, school_name TEXT); CREATE TABLE teachers (teacher_id INT, teacher_name TEXT, school_id INT);", "sql": "INSERT INTO teachers (teacher_id, teacher_name, school_id) VALUES (98765, 'Mx. Lopez', 4321); UPDATE schools s SET s.teacher_id = 98765 WHERE EXISTS (SELECT * FROM teachers t WHERE t.school_id = s.school_id AND s.teacher_id != 98765);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 2} {"question": "Insert new records for 5 users who joined in the past week.", "schema": "CREATE TABLE users (id INT, name TEXT, signup_date DATE); INSERT INTO users (id, name, signup_date) VALUES (1, 'Alice', '2022-01-01'), (2, 'Bob', '2022-01-02'), (3, 'Charlie', '2022-01-03');", "sql": "INSERT INTO users (id, name, signup_date) VALUES (4, 'David', '2022-01-04'), (5, 'Eve', '2022-01-05'), (6, 'Frank', '2022-01-06');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Delete the concert with ID 1.", "schema": "CREATE TABLE Concerts (ConcertID INT, ArtistID INT, Venue VARCHAR(100), TicketsSold INT); INSERT INTO Concerts (ConcertID, ArtistID, Venue, TicketsSold) VALUES (1, 1, 'New York', 5000); INSERT INTO Concerts (ConcertID, ArtistID, Venue, TicketsSold) VALUES (2, 5, 'Seoul', 8000);", "sql": "DELETE FROM Concerts WHERE ConcertID = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total budget allocated for policy advocacy in the last quarter?", "schema": "CREATE TABLE PolicyAdvocacy (PolicyAdvocacyID INT, PolicyName VARCHAR(50), Budget DECIMAL(5,2)); INSERT INTO PolicyAdvocacy (PolicyAdvocacyID, PolicyName, Budget) VALUES (1, 'Accessibility Laws', 5000.00); INSERT INTO PolicyAdvocacy (PolicyAdvocacyID, PolicyName, Budget) VALUES (2, 'Inclusion Programs', 7000.00);", "sql": "SELECT SUM(Budget) as TotalBudget FROM PolicyAdvocacy WHERE Date BETWEEN DATEADD(quarter, -1, GETDATE()) AND GETDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the total weight of shipments to a given country from all continents?", "schema": "CREATE TABLE shipments (id INT, origin_continent VARCHAR(255), destination_country VARCHAR(255), weight FLOAT); INSERT INTO shipments (id, origin_continent, destination_country, weight) VALUES (1, 'Asia', 'Australia', 700.0), (2, 'Africa', 'Australia', 800.0);", "sql": "SELECT destination_country, SUM(weight) as total_weight FROM shipments GROUP BY destination_country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location of Blanca Peak?", "schema": "CREATE TABLE table_name_77 (location VARCHAR, mountain_peak VARCHAR)", "sql": "SELECT location FROM table_name_77 WHERE mountain_peak = 'blanca peak';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average horsepower of vehicles manufactured in Japan?", "schema": "CREATE TABLE Vehicles (id INT, make VARCHAR(50), model VARCHAR(50), horsepower INT, country VARCHAR(50)); INSERT INTO Vehicles (id, make, model, horsepower, country) VALUES (1, 'Toyota', 'Corolla', 139, 'Japan');", "sql": "SELECT AVG(horsepower) FROM Vehicles WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Insert new virtual tours that recently became available.", "schema": "CREATE TABLE virtual_tours (tour_id INT, tour_name TEXT, location TEXT, price DECIMAL(5,2)); INSERT INTO virtual_tours (tour_id, tour_name, location, price) VALUES (1, 'Louvre VR Experience', 'Paris', 24.99), (2, 'Gondola Tour in Venice', 'Venice', 19.99), (3, 'Great Wall of China Virtual Walk', 'China', 29.99);", "sql": "INSERT INTO virtual_tours (tour_id, tour_name, location, price) VALUES (4, 'Petra Virtual Tour', 'Jordan', 22.99), (5, 'Galapagos Islands Virtual Experience', 'Ecuador', 34.99), (6, 'Angkor Wat Virtual Reality Tour', 'Cambodia', 27.99);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the voltage of the model with part number TT80503300?", "schema": "CREATE TABLE table_24096813_15 (voltage VARCHAR, part_number_s_ VARCHAR)", "sql": "SELECT voltage FROM table_24096813_15 WHERE part_number_s_ = 'TT80503300';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what was the score of the Championship match played in Estoril, Portugal?", "schema": "CREATE TABLE table_name_92 (score_in_the_final VARCHAR, championship VARCHAR)", "sql": "SELECT score_in_the_final FROM table_name_92 WHERE championship = 'estoril, portugal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What percentage of total donations went towards education initiatives in 2019?", "schema": "CREATE TABLE donors (id INT, name TEXT);CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL, donation_date DATE, initiative_category TEXT);", "sql": "SELECT (SUM(CASE WHEN initiative_category = 'education' THEN donations.amount ELSE 0 END) / SUM(donations.amount)) * 100 as education_percentage FROM donations WHERE YEAR(donation_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "What is the total horsepower of vehicles released in 2017?", "schema": "CREATE TABLE VehicleData (Id INT, Name VARCHAR(50), Year INT, Horsepower INT); INSERT INTO VehicleData (Id, Name, Year, Horsepower) VALUES (1, 'Corvette', 2017, 460), (2, '911 Turbo', 2017, 540), (3, 'M4 GTS', 2016, 493);", "sql": "SELECT SUM(Horsepower) FROM VehicleData WHERE Year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "List all underwater volcanoes near the Mariana Trench.", "schema": "CREATE TABLE underwater_volcanoes (id INT, name TEXT, lat FLOAT, lon FLOAT, depth INT); INSERT INTO underwater_volcanoes (id, name, lat, lon, depth) VALUES (1, 'NW Rota-1', 14.05, 145.75, 5367), (2, 'Ferdinand De Lesseps', 11.87, 142.33, 4000); CREATE TABLE mariana_trench (id INT, point TEXT, lat FLOAT, lon FLOAT); INSERT INTO mariana_trench (id, point, lat, lon) VALUES (1, 'Challenger Deep', 11.23, 142.19), (2, 'Sirena Deep', 11.20, 142.15);", "sql": "SELECT uv.name FROM underwater_volcanoes uv INNER JOIN mariana_trench mt ON uv.lat BETWEEN mt.lat - 1 AND mt.lat + 1 AND uv.lon BETWEEN mt.lon - 1 AND mt.lon + 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Which countries have the most female film directors?", "schema": "CREATE TABLE country (country_id INT, country_name VARCHAR(50)); INSERT INTO country (country_id, country_name) VALUES (1, 'United States'), (2, 'Canada'), (3, 'France'); CREATE TABLE film_director (director_id INT, director_name VARCHAR(50), country_id INT); INSERT INTO film_director (director_id, director_name, country_id) VALUES (1, 'Ava DuVernay', 1), (2, 'Xavier Dolan', 2), (3, 'Celine Sciamma', 3);", "sql": "SELECT country_name, COUNT(*) as num_female_directors FROM film_director fd JOIN country c ON fd.country_id = c.country_id WHERE director_name = 'Ava DuVernay' OR director_name = 'Celine Sciamma' GROUP BY country_name ORDER BY num_female_directors DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 253, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player has less than 201 games, is ranked 2, and played between 2007-2012?", "schema": "CREATE TABLE table_name_74 (player VARCHAR, years VARCHAR, games VARCHAR, rank VARCHAR)", "sql": "SELECT player FROM table_name_74 WHERE games < 201 AND rank = 2 AND years = '2007-2012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total budget allocated for transportation services in Brazil in 2019?", "schema": "CREATE SCHEMA br_schema;CREATE TABLE br_schema.transportation_budget (year INT, service VARCHAR(20), amount INT);INSERT INTO br_schema.transportation_budget (year, service, amount) VALUES (2019, 'Transportation', 20000000);", "sql": "SELECT amount FROM br_schema.transportation_budget WHERE year = 2019 AND service = 'Transportation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many total medals for the nation with 1 gold and 6 bronzes?", "schema": "CREATE TABLE table_name_52 (total VARCHAR, gold VARCHAR, bronze VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_52 WHERE gold = 1 AND bronze = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which network has model nForce Professional 3400 MCP?", "schema": "CREATE TABLE table_name_29 (network VARCHAR, model VARCHAR)", "sql": "SELECT network FROM table_name_29 WHERE model = 'nforce professional 3400 mcp';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the Tournament with Opponent in the final of Martin Spottl?", "schema": "CREATE TABLE table_name_70 (score VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT score FROM table_name_70 WHERE opponent_in_the_final = 'martin spottl';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What name is in the number 2 spot when Ethan is in the number 6 spot and Mason is in the number 3 spot?", "schema": "CREATE TABLE table_name_7 (no_2 VARCHAR, no_6 VARCHAR, no_3 VARCHAR)", "sql": "SELECT no_2 FROM table_name_7 WHERE no_6 = 'ethan' AND no_3 = 'mason';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many new points were earned by rk 26?", "schema": "CREATE TABLE table_27615896_18 (new_points VARCHAR, rk VARCHAR)", "sql": "SELECT new_points FROM table_27615896_18 WHERE rk = 26;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE LANGUAGE (example 1).", "schema": null, "sql": "CREATE FUNCTION plsample_call_handler() RETURNS language_handler AS '$libdir/plsample' LANGUAGE C; CREATE LANGUAGE plsample HANDLER plsample_call_handler;", "explanation": "PostgreSQL CREATE LANGUAGE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 2} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT ts_lexize('hunspell_num', 'unbooking');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_lexize('hunspell_num', 'unbooking')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Catalog has a Date of july 15, 2011?", "schema": "CREATE TABLE table_name_73 (catalog INTEGER, date VARCHAR)", "sql": "SELECT AVG(catalog) FROM table_name_73 WHERE date = 'july 15, 2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average carbon sequestration for each forest type?", "schema": "CREATE TABLE forests (forest_type VARCHAR(255), year INT, carbon_sequestration INT); INSERT INTO forests (forest_type, year, carbon_sequestration) VALUES ('Temperate', 2018, 500), ('Temperate', 2019, 550), ('Temperate', 2020, 600), ('Temperate', 2021, 650), ('Boreal', 2018, 700), ('Boreal', 2019, 750), ('Boreal', 2020, 800), ('Boreal', 2021, 850), ('Tropical', 2018, 900), ('Tropical', 2019, 950), ('Tropical', 2020, 1000), ('Tropical', 2021, 1050);", "sql": "SELECT forest_type, AVG(carbon_sequestration) as avg_carbon_sequestration FROM forests GROUP BY forest_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "What is the minimum salinity in the Pacific Ocean?", "schema": "CREATE TABLE location (location_id INT, location_name TEXT); INSERT INTO location (location_id, location_name) VALUES (1, 'Pacific Ocean'); CREATE TABLE salinity (salinity_id INT, location_id INT, salinity FLOAT); INSERT INTO salinity (salinity_id, location_id, salinity) VALUES (1, 1, 34.5), (2, 1, 34.6), (3, 1, 34.7), (4, 1, 34.8), (5, 1, 34.9);", "sql": "SELECT MIN(salinity) FROM salinity WHERE location_id = (SELECT location_id FROM location WHERE location_name = 'Pacific Ocean');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the total number of articles published by female authors in the 'articles' table?", "schema": "CREATE TABLE articles (title VARCHAR(255), author_name VARCHAR(255), author_gender VARCHAR(10), publication_date DATE);", "sql": "SELECT COUNT(*) FROM articles WHERE author_gender = 'female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points were scored against the club that drew 3 and scored 50 points?", "schema": "CREATE TABLE table_name_60 (points_against VARCHAR, drawn VARCHAR, points VARCHAR)", "sql": "SELECT points_against FROM table_name_60 WHERE drawn = '3' AND points = '50';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of the match with Opponent in the final Kenneth Carlsen?", "schema": "CREATE TABLE table_name_41 (date VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT date FROM table_name_41 WHERE opponent_in_the_final = 'kenneth carlsen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Create a new table named 'tournaments' with columns 'id', 'name', 'location', 'start_date', 'end_date'", "schema": "CREATE SCHEMA if not exists gaming;", "sql": "CREATE TABLE gaming.tournaments (id INT, name VARCHAR(100), location VARCHAR(100), start_date DATE, end_date DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of silver when there is 1 bronze and less than 4 golds?", "schema": "CREATE TABLE table_name_38 (silver INTEGER, bronze VARCHAR, gold VARCHAR)", "sql": "SELECT MAX(silver) FROM table_name_38 WHERE bronze = 1 AND gold < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Please give me a list of cities whose regional population is over 10000000.", "schema": "CREATE TABLE city (city VARCHAR, regional_population INTEGER)", "sql": "SELECT city FROM city WHERE regional_population > 10000000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Update the response time to 45 minutes for all emergencies in the 'Eastside' district.", "schema": "CREATE TABLE districts (district_id INT, district_name TEXT);CREATE TABLE emergencies (emergency_id INT, district_id INT, response_time INT);", "sql": "UPDATE emergencies SET response_time = 45 WHERE district_id = (SELECT district_id FROM districts WHERE district_name = 'Eastside');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Which departments have a budget allocation below the average budget?", "schema": "CREATE TABLE dept_budget (dept VARCHAR(50), budget INT); INSERT INTO dept_budget (dept, budget) VALUES ('Infrastructure', 800000), ('Education', 700000), ('Health', 900000);", "sql": "SELECT dept FROM dept_budget WHERE budget < (SELECT AVG(budget) FROM dept_budget);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what was the result on 01x06", "schema": "CREATE TABLE table_29141354_1 (scores VARCHAR, episode VARCHAR)", "sql": "SELECT scores FROM table_29141354_1 WHERE episode = '01x06';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average yield of potato crops across different countries?", "schema": "CREATE TABLE Country (id INT, name VARCHAR(255)); INSERT INTO Country (id, name) VALUES (1, 'Canada'), (2, 'Mexico'), (3, 'Brazil'); CREATE TABLE Crop (id INT, name VARCHAR(255), yield INT); INSERT INTO Crop (id, name, yield) VALUES (1, 'Potato', 25), (2, 'Corn', 75), (3, 'Potato', 15);", "sql": "SELECT AVG(Crop.yield) FROM Crop INNER JOIN Country ON Crop.id = Country.id WHERE Crop.name = 'Potato';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many genes are in species burkholderia pseudomallei with 4,126,292 base pairs?", "schema": "CREATE TABLE table_name_13 (genes VARCHAR, species VARCHAR, base_pairs VARCHAR)", "sql": "SELECT genes FROM table_name_13 WHERE species = 'burkholderia pseudomallei' AND base_pairs = '4,126,292';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "PostgreSQL regression test 'xmlmap': Write the SELECT query (example 26).", "schema": null, "sql": "SELECT cursor_to_xml('xc'::refcursor, 5, false, true, '');", "explanation": "Regression test for Xmlmap in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT cursor_to_xml('xc'::refcursor, 5, false, true, '')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average response time for emergency calls in the state of Illinois, excluding calls that took more than 20 minutes to respond?", "schema": "CREATE TABLE emergency_calls (id INT, state VARCHAR(20), response_time INT);", "sql": "SELECT AVG(response_time) FROM emergency_calls WHERE state = 'Illinois' AND response_time < 20*60;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the number of art pieces created by artists from different countries in the oil painting medium?", "schema": "CREATE SCHEMA art; CREATE TABLE art_pieces (art_id INT, art_name VARCHAR(255), artist_name VARCHAR(255), artist_country VARCHAR(50), medium VARCHAR(50), creation_date DATE); INSERT INTO art.art_pieces (art_id, art_name, artist_name, artist_country, medium, creation_date) VALUES (1, 'Painting', 'Sarah Johnson', 'USA', 'Oil', '2018-01-01'), (2, 'Sculpture', 'Mia Kim', 'South Korea', 'Bronze', '2019-05-15'), (3, 'Print', 'Jamie Lee', 'Canada', 'Woodcut', '2020-12-31'), (4, 'Installation', 'David Park', 'Mexico', 'Mixed Media', '2020-06-01'), (5, 'Painting', 'David Park', 'Brazil', 'Watercolor', '2019-12-31');", "sql": "SELECT artist_country, COUNT(*) as count FROM art.art_pieces WHERE medium = 'Oil' GROUP BY artist_country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total number of accessible technology projects, and how many of those are in Africa?", "schema": "CREATE TABLE accessible_tech_projects (id INT, country VARCHAR(2), project_accessibility VARCHAR(10)); INSERT INTO accessible_tech_projects (id, country, project_accessibility) VALUES (1, 'US', 'yes'), (2, 'CA', 'no'), (3, 'MX', 'yes'), (4, 'BR', 'yes'), (5, 'AR', 'no'), (6, 'NG', 'yes'), (7, 'EG', 'no'), (8, 'ZA', 'yes'), (9, 'ET', 'no'), (10, 'GH', 'yes');", "sql": "SELECT COUNT(*) FROM accessible_tech_projects WHERE project_accessibility = 'yes' AND country IN ('NG', 'EG', 'ZA', 'ET', 'GH');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the total quantity of sustainable ingredients used in each dish, categorized by restaurant location?", "schema": "CREATE TABLE Restaurants (RestaurantID INT, Location VARCHAR(50), OpenDate DATETIME); CREATE TABLE Menu (MenuID INT, RestaurantID INT, Name VARCHAR(50), Price DECIMAL(5,2), IsSustainable BOOLEAN); CREATE TABLE Ingredients (IngredientID INT, MenuID INT, Name VARCHAR(50), Quantity INT, IsSustainable BOOLEAN);", "sql": "SELECT Restaurants.Location, SUM(CASE WHEN Ingredients.IsSustainable THEN Ingredients.Quantity ELSE 0 END) as TotalSustainableQuantity FROM Restaurants JOIN Menu ON Restaurants.RestaurantID = Menu.RestaurantID JOIN Ingredients ON Menu.MenuID = Ingredients.MenuID GROUP BY Restaurants.Location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 293, "num_statements": 1} {"question": "What is the average speed of shared electric scooters in Portland, OR?", "schema": "CREATE TABLE shared_scooters (scooter_id INT, ride_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, start_location TEXT, end_location TEXT, city TEXT, vehicle_type TEXT, avg_speed DECIMAL);", "sql": "SELECT AVG(avg_speed) FROM shared_scooters WHERE city = 'Portland' AND vehicle_type = 'electric scooter';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "List the unique neighborhoods where both 'Burglary' and 'Theft' occurred in the last month, excluding neighborhoods with fewer than 2 total crimes.", "schema": "CREATE TABLE Crimes (crime_id INT, crime_type VARCHAR(10), neighborhood VARCHAR(20), date DATE); INSERT INTO Crimes VALUES (1, 'Burglary', 'Parkside', '2022-01-01'), (2, 'Theft', 'Parkside', '2022-01-03'), (3, 'Burglary', 'Downtown', '2022-01-05'), (4, 'Theft', 'Downtown', '2022-01-07'), (5, 'Assault', 'Parkside', '2022-01-09'), (6, 'Assault', 'Downtown', '2022-01-11');", "sql": "SELECT neighborhood FROM Crimes WHERE crime_type IN ('Burglary', 'Theft') AND date >= DATEADD(month, -1, CURRENT_TIMESTAMP) GROUP BY neighborhood HAVING COUNT(DISTINCT crime_type) = 2 AND COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average against that has a drawn less than 7, points greater than 22, and 19 for the played?", "schema": "CREATE TABLE table_name_66 (against INTEGER, played VARCHAR, drawn VARCHAR, points VARCHAR)", "sql": "SELECT AVG(against) FROM table_name_66 WHERE drawn < 7 AND points > 22 AND played = 19;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Shirt Back Sponsor if the Shorts Sponsor is Telestet?", "schema": "CREATE TABLE table_name_69 (shirt_back_sponsor VARCHAR, shorts_sponsor VARCHAR)", "sql": "SELECT shirt_back_sponsor FROM table_name_69 WHERE shorts_sponsor = 'telestet';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the series number for the episode directed by jay sandrich that aired October 23, 1986?", "schema": "CREATE TABLE table_2818164_4 (no_in_series INTEGER, directed_by VARCHAR, original_air_date VARCHAR)", "sql": "SELECT MIN(no_in_series) FROM table_2818164_4 WHERE directed_by = 'Jay Sandrich' AND original_air_date = 'October 23, 1986';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Which users have posted on consecutive days?", "schema": "CREATE TABLE posts (id INT, user_id INT, post_date DATE); INSERT INTO posts (id, user_id, post_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-01-02'), (3, 1, '2022-01-03'), (4, 2, '2022-01-04'), (5, 3, '2022-01-05'), (6, 3, '2022-01-06'), (7, 1, '2022-01-07'), (8, 2, '2022-01-08'), (9, 2, '2022-01-09');", "sql": "SELECT user_id FROM (SELECT user_id, post_date, DATEDIFF(day, LAG(post_date) OVER (PARTITION BY user_id ORDER BY post_date), post_date) AS gap FROM posts) AS t WHERE gap = 1 GROUP BY user_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 191, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: During the Tournament in which Jiří Novák was absent(A) in 1995, absent(A) in 1997, and made it to the 3rd round (3R) in 2003, how did he do in 2006?", "schema": "CREATE TABLE table_name_31 (Id VARCHAR)", "sql": "SELECT 2006 FROM table_name_31 WHERE 1995 = 'a' AND 1997 = 'a' AND 2003 = '3r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average ticket price for concerts in the 'concert_tours' table?", "schema": "CREATE TABLE concert_tours (concert_id INT, concert_name TEXT, artist_id INT, location TEXT, date DATE, price DECIMAL(5,2));", "sql": "SELECT AVG(price) FROM concert_tours;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Delete all satellites deployed before 2010", "schema": "CREATE TABLE satellites (id INT PRIMARY KEY, name VARCHAR(50), launch_date DATE); INSERT INTO satellites (id, name, launch_date) VALUES (1, 'USA-193', '2009-04-22'), (2, 'USA-200', '2010-02-11'), (3, 'USA-202', '2010-11-20'), (4, 'HawkSat-1', '2011-07-14'), (5, 'Dovesat-1', '2012-06-30');", "sql": "DELETE FROM satellites WHERE launch_date < '2010-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the number of projects and their total budget for each type of project?", "schema": "CREATE TABLE projects_4 (id INT, name VARCHAR, type VARCHAR, budget FLOAT); INSERT INTO projects_4 (id, name, type, budget) VALUES (1, 'AI for good', 'AI', 100000), (2, 'Accessible software development', 'Accessibility', 150000), (3, 'Digital divide reduction', 'Digital divide', 200000);", "sql": "SELECT projects_4.type, COUNT(*), SUM(projects_4.budget) FROM projects_4 GROUP BY projects_4.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Find the average temperature (in Celsius) and humidity (in percentage) for the past 30 days from IoT sensors in the 'Field1'", "schema": "CREATE TABLE iot_sensors (id INT, field VARCHAR(50), temperature FLOAT, humidity FLOAT, timestamp DATETIME); INSERT INTO iot_sensors (id, field, temperature, humidity, timestamp) VALUES (1, 'Field1', 22.5, 60.0, '2022-01-01 10:00:00'), (2, 'Field1', 25.0, 55.0, '2022-01-02 10:00:00');", "sql": "SELECT AVG(temperature) as avg_temperature, AVG(humidity) as avg_humidity FROM iot_sensors WHERE timestamp BETWEEN DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 30 DAY) AND CURRENT_TIMESTAMP AND field = 'Field1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "How many unique cities hosted basketball games in 2020?", "schema": "CREATE TABLE cities (id INT, name VARCHAR(255), games_hosted INT); INSERT INTO cities (id, name, games_hosted) VALUES (1, 'City1', 5); INSERT INTO cities (id, name, games_hosted) VALUES (2, 'City2', 3);", "sql": "SELECT COUNT(DISTINCT name) FROM cities WHERE games_hosted > 0 AND EXTRACT(YEAR FROM games_hosted_date) = 2020 AND name IN (SELECT city FROM games WHERE sport = 'Basketball');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which Tournament was Mansour Bahrami Eric Winogradsky", "schema": "CREATE TABLE table_name_9 (tournament VARCHAR, opponent_in_final VARCHAR)", "sql": "SELECT tournament FROM table_name_9 WHERE opponent_in_final = 'mansour bahrami eric winogradsky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the minimum number of accommodations provided to a single student with a mobility impairment in the art department?", "schema": "CREATE TABLE students (id INT, mobility_impairment BOOLEAN, department VARCHAR(255)); INSERT INTO students (id, mobility_impairment, department) VALUES (1, true, 'art'), (2, false, 'engineering'), (3, true, 'art'), (4, true, 'mathematics'), (5, false, 'art'), (6, true, 'art'); CREATE TABLE accommodations (id INT, student_id INT, year INT); INSERT INTO accommodations (id, student_id, year) VALUES (1, 1, 2018), (2, 1, 2019), (3, 3, 2018), (4, 3, 2019), (5, 3, 2020), (6, 4, 2020), (7, 5, 2018), (8, 5, 2019), (9, 5, 2020), (10, 5, 2021), (11, 6, 2018), (12, 6, 2019), (13, 6, 2020), (14, 6, 2021);", "sql": "SELECT MIN(accommodations) FROM (SELECT student_id, COUNT(*) as accommodations FROM accommodations GROUP BY student_id) as subquery WHERE student_id IN (SELECT id FROM students WHERE mobility_impairment = true AND department = 'art');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "List all collaborations between artists of different genres in the last year.", "schema": "CREATE TABLE Collaborations (CollaborationId INT, Artist1 VARCHAR(255), Genre1 VARCHAR(255), Artist2 VARCHAR(255), Genre2 VARCHAR(255), CollaborationDate DATE); INSERT INTO Collaborations (CollaborationId, Artist1, Genre1, Artist2, Genre2, CollaborationDate) VALUES (1, 'Drake', 'Hip Hop', 'Ariana Grande', 'Pop', '2021-03-26'), (2, 'Post Malone', 'Rap Rock', 'Swae Lee', 'R&B', '2021-06-12'), (3, 'Justin Bieber', 'Pop', 'Chance the Rapper', 'Hip Hop', '2022-01-15'), (4, 'Lady Gaga', 'Pop', 'Tony Bennett', 'Jazz', '2021-10-01'), (5, 'Kendrick Lamar', 'Hip Hop', 'Sia', 'Pop', '2022-02-20');", "sql": "SELECT Artist1, Genre1, Artist2, Genre2 FROM Collaborations WHERE CollaborationDate >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) AND Genre1 <> Genre2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total round of the 129 pick?", "schema": "CREATE TABLE table_name_49 (round VARCHAR, pick__number VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_49 WHERE pick__number = 129;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Composite (example 83).", "schema": null, "sql": "SELECT * FROM return_record_2('v4') AS (v1 int, v4 int, v2 int);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Composite.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average births that had a death rate of 0.4", "schema": "CREATE TABLE table_name_38 (births__000s_ INTEGER, deaths VARCHAR)", "sql": "SELECT AVG(births__000s_) FROM table_name_38 WHERE deaths = 0.4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'hash_index' (example 21).", "schema": null, "sql": "create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000;", "explanation": "DDL from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was their record on Wed. Dec. 5, when they played in Boston Garden?", "schema": "CREATE TABLE table_name_11 (record VARCHAR, location VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_name_11 WHERE location = 'boston garden' AND date = 'wed. dec. 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the career duration of the bowler who played 60 matches?", "schema": "CREATE TABLE table_18914438_1 (career VARCHAR, matches VARCHAR)", "sql": "SELECT career FROM table_18914438_1 WHERE matches = 60;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Update the automation trend for May 5, 2022", "schema": "CREATE TABLE automation_trends (date DATETIME, trend_data VARCHAR(500));", "sql": "UPDATE automation_trends SET trend_data = '...' WHERE date = '2022-05-05';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many co-owned properties are there in Austin, TX?", "schema": "CREATE TABLE properties (id INT, city VARCHAR(50), co_ownership BOOLEAN); INSERT INTO properties (id, city, co_ownership) VALUES (1, 'Austin', true), (2, 'Dallas', false);", "sql": "SELECT COUNT(*) FROM properties WHERE city = 'Austin' AND co_ownership = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the transfer wind for giuly", "schema": "CREATE TABLE table_name_15 (transfer_window VARCHAR, name VARCHAR)", "sql": "SELECT transfer_window FROM table_name_15 WHERE name = 'giuly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the Hindu with Jewish of source: uk 2001 census", "schema": "CREATE TABLE table_name_87 (hindu VARCHAR, jewish VARCHAR)", "sql": "SELECT hindu FROM table_name_87 WHERE jewish = 'source: uk 2001 census';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the Westfield State University located?", "schema": "CREATE TABLE table_1974545_2 (location VARCHAR, institution VARCHAR)", "sql": "SELECT location FROM table_1974545_2 WHERE institution = 'Westfield State University';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the upstream for the internet 100 plans?", "schema": "CREATE TABLE table_name_42 (upstream VARCHAR, internet_plan VARCHAR)", "sql": "SELECT upstream FROM table_name_42 WHERE internet_plan = 'internet 100';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Find the number of players who have played \"Fighting Game F\" and identify their gender.", "schema": "CREATE TABLE Fighting_Game_F (player_id INT, name VARCHAR(50), age INT, gender VARCHAR(10)); INSERT INTO Fighting_Game_F (player_id, name, age, gender) VALUES (2, 'Jane Smith', 24, 'Female'), (4, 'Bob Brown', 30, 'Male'), (11, 'Oliver Lee', 26, 'Male');", "sql": "SELECT COUNT(*), gender FROM Fighting_Game_F GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total catch weight for fish in the 'South Atlantic' region?", "schema": "CREATE TABLE fish_species (id INT, species TEXT, region TEXT); INSERT INTO fish_species (id, species, region) VALUES (1, 'Tuna', 'South Atlantic'), (2, 'Sardines', 'South Atlantic'), (3, 'Mackerel', 'North Atlantic'); CREATE TABLE catch_data (id INT, species TEXT, weight FLOAT); INSERT INTO catch_data (id, species, weight) VALUES (1, 'Tuna', 400), (2, 'Sardines', 600), (3, 'Mackerel', 350);", "sql": "SELECT SUM(catch_data.weight) FROM catch_data INNER JOIN fish_species ON catch_data.species = fish_species.species WHERE fish_species.region = 'South Atlantic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Show the total production volume for each mine and year in the \"mine_production\", \"mines\", and \"calendar\" tables", "schema": "CREATE TABLE mines (id INT, name VARCHAR(20)); INSERT INTO mines (id, name) VALUES (1, 'Golden Mine'), (2, 'Silver Mine'); CREATE TABLE calendar (year INT); INSERT INTO calendar (year) VALUES (2020), (2021), (2022); CREATE TABLE mine_production (mine_id INT, year INT, volume INT); INSERT INTO mine_production (mine_id, year, volume) VALUES (1, 2020, 1000), (1, 2021, 1200), (2, 2022, 1500);", "sql": "SELECT m.name, c.year, SUM(mp.volume) as total_volume FROM mines m JOIN mine_production mp ON m.id = mp.mine_id JOIN calendar c ON mp.year = c.year GROUP BY m.name, c.year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the percentage of people with access to clean water in Egypt?", "schema": "CREATE TABLE Water (ID INT, Country VARCHAR(100), Year INT, CleanWaterPercentage FLOAT); INSERT INTO Water (ID, Country, Year, CleanWaterPercentage) VALUES (1, 'Egypt', 2020, 98);", "sql": "SELECT CleanWaterPercentage FROM Water WHERE Country = 'Egypt' AND Year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Create a table for traditional art forms by country.", "schema": "CREATE TABLE Country_Arts (id INT, country TEXT, art_name TEXT); INSERT INTO Country_Arts (id, country, art_name) VALUES (1, 'Nigeria', 'Uli'); INSERT INTO Country_Arts (id, country, art_name) VALUES (2, 'India', 'Madhubani');", "sql": "CREATE TABLE Country_Arts (id INT, country TEXT, art_name TEXT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of military bases in the 'US_Military_Bases' table?", "schema": "CREATE SCHEMA IF NOT EXISTS defense_security;CREATE TABLE IF NOT EXISTS defense_security.US_Military_Bases (id INT PRIMARY KEY, base_name VARCHAR(255), location VARCHAR(255), type VARCHAR(255));INSERT INTO defense_security.US_Military_Bases (id, base_name, location, type) VALUES (1, 'Fort Bragg', 'North Carolina', 'Army Base');", "sql": "SELECT COUNT(*) FROM defense_security.US_Military_Bases;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Blagojevich (D) happened on october 16, 2006?", "schema": "CREATE TABLE table_name_77 (blagojevich__d_ VARCHAR, date VARCHAR)", "sql": "SELECT blagojevich__d_ FROM table_name_77 WHERE date = 'october 16, 2006';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 40).", "schema": null, "sql": "SELECT JSON_SERIALIZE(JSON('{ \"a\" : 1 } '));", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_SERIALIZE(JSON('{ \"a\" : 1 } '))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season shows podium 7?", "schema": "CREATE TABLE table_29471472_1 (season INTEGER, podiums VARCHAR)", "sql": "SELECT MIN(season) FROM table_29471472_1 WHERE podiums = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Find the projects that have been completed in both the 'rural_development' and 'urban_development' databases.", "schema": "CREATE TABLE RuralDevelopment_InfrastructureProjects (id INT PRIMARY KEY, name VARCHAR(100), status VARCHAR(20), evaluation_date DATE); INSERT INTO RuralDevelopment_InfrastructureProjects (id, name, status, evaluation_date) VALUES (1, 'Water Treatment Plant', 'completed', NULL), (2, 'Renewable Energy Center', 'in_progress', '2023-02-28'), (3, 'Rural Connectivity', 'completed', '2022-09-15'); CREATE TABLE UrbanDevelopment_InfrastructureProjects (id INT PRIMARY KEY, name VARCHAR(100), status VARCHAR(20), evaluation_date DATE); INSERT INTO UrbanDevelopment_InfrastructureProjects (id, name, status, evaluation_date) VALUES (1, 'Water Treatment Plant', 'completed', '2022-08-10'), (2, 'Urban Connectivity', 'completed', '2022-07-01'), (3, 'Transportation Center', 'in_progress', '2023-02-28');", "sql": "SELECT RuralDevelopment_InfrastructureProjects.name FROM RuralDevelopment_InfrastructureProjects INNER JOIN UrbanDevelopment_InfrastructureProjects ON RuralDevelopment_InfrastructureProjects.name = UrbanDevelopment_InfrastructureProjects.name WHERE RuralDevelopment_InfrastructureProjects.status = 'completed' AND UrbanDevelopment_InfrastructureProjects.status = 'completed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 375, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 4).", "schema": null, "sql": "CREATE OPERATOR <% (\n LEFTARG = text,\n RIGHTARG = text,\n PROCEDURE = word_similarity_op,\n COMMUTATOR = '%>',\n RESTRICT = contsel,\n JOIN = contjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What round was on 11/11/1992?", "schema": "CREATE TABLE table_name_91 (round VARCHAR, date VARCHAR)", "sql": "SELECT round FROM table_name_91 WHERE date = '11/11/1992';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_prune': Write the SELECT query (example 135).", "schema": null, "sql": "select * from boolpart where not a = false;", "explanation": "Regression test for Partition Prune in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from boolpart where not a = false) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the distribution of student mental health scores by grade level, for students who identify as Indigenous or Native Hawaiian?", "schema": "CREATE TABLE student_mental_health_scores (score_id INT, student_id INT, grade_level INT, mental_health_score INT, student_ethnicity VARCHAR(50));", "sql": "SELECT grade_level, student_ethnicity, AVG(mental_health_score) FROM student_mental_health_scores WHERE student_ethnicity IN ('Indigenous', 'Native Hawaiian') GROUP BY grade_level, student_ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Find dishes with the highest sales per day and their average CO2 emissions.", "schema": "CREATE TABLE dishes (dish_name VARCHAR(255), daily_sales INT, co2_emissions INT);", "sql": "SELECT d.dish_name, AVG(d.co2_emissions) as avg_co2, MAX(d.daily_sales) as max_sales FROM dishes d GROUP BY d.dish_name ORDER BY max_sales DESC LIMIT 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of pinyin where simplified is 河西区", "schema": "CREATE TABLE table_1638437_2 (pinyin VARCHAR, simplified VARCHAR)", "sql": "SELECT COUNT(pinyin) FROM table_1638437_2 WHERE simplified = '河西区';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country has Weerawila Airport?", "schema": "CREATE TABLE table_name_76 (country VARCHAR, airport VARCHAR)", "sql": "SELECT country FROM table_name_76 WHERE airport = 'weerawila airport';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of donors from Japan and Brazil combined?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donor_country TEXT); INSERT INTO donors (donor_id, donor_name, donor_country) VALUES (1, 'John Doe', 'Japan'), (2, 'Jane Smith', 'USA'), (3, 'Alice Johnson', 'Canada'), (4, 'Carlos Alvarez', 'Brazil'), (5, 'Elizabeth Brown', 'UK');", "sql": "SELECT COUNT(DISTINCT donor_country) as total_donors FROM donors WHERE donor_country IN ('Japan', 'Brazil');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Which brands offer the most vegan-friendly products?", "schema": "CREATE TABLE brands (brand_id INT PRIMARY KEY, brand_name VARCHAR(50)); CREATE TABLE products (product_id INT, brand_id INT, PRIMARY KEY (product_id, brand_id), FOREIGN KEY (brand_id) REFERENCES brands(brand_id)); CREATE TABLE vegan_products (product_id INT, brand_id INT, PRIMARY KEY (product_id, brand_id), FOREIGN KEY (product_id) REFERENCES products(product_id), FOREIGN KEY (brand_id) REFERENCES brands(brand_id)); INSERT INTO brands (brand_id, brand_name) VALUES (1, 'Kat Von D'), (2, 'LUSH'), (3, 'The Body Shop'); INSERT INTO products (product_id, brand_id) VALUES (1, 1), (2, 1), (3, 2), (4, 2), (5, 3), (6, 3); INSERT INTO vegan_products (product_id, brand_id) VALUES (1, 1), (2, 1), (3, 2), (5, 3);", "sql": "SELECT brand_name, COUNT(*) as product_count FROM vegan_products JOIN brands ON vegan_products.brand_id = brands.brand_id GROUP BY brand_id ORDER BY product_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "List all technology initiatives focused on social good in Pacific Asian countries.", "schema": "CREATE TABLE TechInitiatives (InitiativeID INT, InitiativeName TEXT, Country TEXT, Focus TEXT); INSERT INTO TechInitiatives (InitiativeID, InitiativeName, Country, Focus) VALUES (1, 'Initiative A', 'Japan', 'Social Good'); INSERT INTO TechInitiatives (InitiativeID, InitiativeName, Country, Focus) VALUES (2, 'Initiative B', 'South Korea', 'Healthcare'); INSERT INTO TechInitiatives (InitiativeID, InitiativeName, Country, Focus) VALUES (3, 'Initiative C', 'Singapore', 'Social Good');", "sql": "SELECT InitiativeName, Country FROM TechInitiatives WHERE Country IN ('Japan', 'South Korea', 'Singapore') AND Focus = 'Social Good';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Update the collective bargaining agreement expiration date for a specific union affiliation.", "schema": "CREATE TABLE union_info (id INT, union_affiliation TEXT, collective_bargaining_agreement_expiration DATE); INSERT INTO union_info (id, union_affiliation, collective_bargaining_agreement_expiration) VALUES (1, 'Union A', '2022-12-31'), (2, 'Union B', '2023-06-30'), (3, 'Union C', '2024-01-31');", "sql": "UPDATE union_info SET collective_bargaining_agreement_expiration = '2025-01-31' WHERE union_affiliation = 'Union A';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Local economic impact of sustainable tourism in each province?", "schema": "CREATE TABLE local_economy_extended_2 (province TEXT, impact FLOAT, year INT); INSERT INTO local_economy_extended_2 (province, impact, year) VALUES ('Ontario', 50000.0, 2021), ('British Columbia', 75000.0, 2021), ('Quebec', 60000.0, 2021);", "sql": "SELECT province, impact FROM local_economy_extended_2 WHERE year = 2021 GROUP BY province;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Compute the percentage of renewable energy used for transportation in the European Union, for each country, in the last 5 years.", "schema": "CREATE TABLE EU_Transportation (country VARCHAR(255), year INT, renewable_energy INT); INSERT INTO EU_Transportation (country, year, renewable_energy) VALUES ('Germany', 2018, 10), ('France', 2018, 15), ('Italy', 2018, 12), ('Spain', 2018, 14), ('Germany', 2019, 12), ('France', 2019, 16), ('Italy', 2019, 13), ('Spain', 2019, 15);", "sql": "SELECT country, (SUM(renewable_energy) FILTER (WHERE year BETWEEN 2018 AND 2022) OVER (PARTITION BY country)::DECIMAL / SUM(renewable_energy) OVER (PARTITION BY country)) * 100 AS pct_renewable_transport FROM EU_Transportation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 227, "num_statements": 1} {"question": "How many workers were involved in each construction project in the 'Projects' and 'Workforce' tables?", "schema": "CREATE TABLE Projects (projectID INT, projectName VARCHAR(50));CREATE TABLE Workforce (workerID INT, workerStartDate DATE, workerEndDate DATE, projectID INT);", "sql": "SELECT P.projectName, COUNT(W.workerID) AS Workers FROM Projects P INNER JOIN Workforce W ON P.projectID = W.projectID GROUP BY P.projectName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many debates are there?", "schema": "CREATE TABLE debate (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM debate;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average total with a Rank larger than 11, a Nation of switzerland, and a Silver smaller than 0?", "schema": "CREATE TABLE table_name_85 (total INTEGER, silver VARCHAR, rank VARCHAR, nation VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_85 WHERE rank > 11 AND nation = 'switzerland' AND silver < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Available in 2-CD format, what is the title of story number 026?", "schema": "CREATE TABLE table_name_83 (title VARCHAR, format VARCHAR, story__number VARCHAR)", "sql": "SELECT title FROM table_name_83 WHERE format = '2-cd' AND story__number = '026';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 560).", "schema": null, "sql": "CREATE UNIQUE INDEX concur_exprs_index_pred_2\n ON concur_exprs_tab ((1 / c1))\n WHERE ('-H') >= (c2::TEXT) COLLATE \"C\";", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the hometown for the player that attended Shanley High School?", "schema": "CREATE TABLE table_11677691_4 (hometown VARCHAR, school VARCHAR)", "sql": "SELECT hometown FROM table_11677691_4 WHERE school = 'Shanley High school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many 'Sustainable Material' garments are produced in 'South America'?", "schema": "CREATE TABLE sustainable_garments(garment VARCHAR(20), material VARCHAR(20), region VARCHAR(20)); INSERT INTO sustainable_garments VALUES('Skirts', 'Sustainable Material', 'South America');", "sql": "SELECT COUNT(*) FROM sustainable_garments WHERE material = 'Sustainable Material' AND region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show ids for the faculty members who don't advise any student.", "schema": "CREATE TABLE Faculty (FacID VARCHAR, advisor VARCHAR); CREATE TABLE Student (FacID VARCHAR, advisor VARCHAR)", "sql": "SELECT FacID FROM Faculty EXCEPT SELECT advisor FROM Student;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the IHSAA class for the school where the mascot is the Rebels?", "schema": "CREATE TABLE table_name_80 (ihsaa_class VARCHAR, mascot VARCHAR)", "sql": "SELECT ihsaa_class FROM table_name_80 WHERE mascot = 'rebels';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: A plural abbreviation of pp. is used for what singular word?", "schema": "CREATE TABLE table_name_82 (singular_word VARCHAR, plural_abbreviation VARCHAR)", "sql": "SELECT singular_word FROM table_name_82 WHERE plural_abbreviation = 'pp.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "List the names and total points scored by football players in the 'football_matches' table, sorted by total points in descending order.", "schema": "CREATE TABLE football_teams (team_id INT, team_name VARCHAR(50));CREATE VIEW football_matches AS SELECT player_id, team_id, SUM(points) AS total_points FROM football_player_scores GROUP BY player_id, team_id;", "sql": "SELECT football_teams.team_name, football_matches.total_points, basketball_players.name FROM football_teams INNER JOIN football_matches ON football_teams.team_id = football_matches.team_id INNER JOIN basketball_players ON basketball_players.player_id = football_matches.player_id ORDER BY football_matches.total_points DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 324, "num_statements": 1} {"question": "CO2 emissions of the transportation sector in 2018", "schema": "CREATE TABLE co2_emissions (id INT, sector VARCHAR(255), year INT, co2_emissions FLOAT);", "sql": "SELECT sector, SUM(co2_emissions) FROM co2_emissions WHERE sector = 'Transportation' AND year = 2018 GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What episode in the season was directed by Jeff Melman?", "schema": "CREATE TABLE table_11058032_1 (no_in_season INTEGER, directed_by VARCHAR)", "sql": "SELECT MIN(no_in_season) FROM table_11058032_1 WHERE directed_by = 'Jeff Melman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What was the average energy efficiency rating for commercial buildings in Texas in 2018?", "schema": "CREATE TABLE building_energy_efficiency (state VARCHAR(20), year INT, building_type VARCHAR(20), energy_efficiency_rating FLOAT);", "sql": "SELECT AVG(energy_efficiency_rating) FROM building_energy_efficiency WHERE state = 'Texas' AND year = 2018 AND building_type = 'Commercial';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the maximum price of artworks created by Mexican artists?", "schema": "CREATE TABLE Artists (artist_id INT, artist_name VARCHAR(50), birth_date DATE, country VARCHAR(50)); INSERT INTO Artists (artist_id, artist_name, birth_date, country) VALUES (1, 'Clara Peeters', '1594-01-15', 'Netherlands'); ; CREATE TABLE Artworks (artwork_id INT, title VARCHAR(50), year_made INT, artist_id INT, price FLOAT); INSERT INTO Artworks (artwork_id, title, year_made, artist_id, price) VALUES (1, 'Still Life with Flowers', 1612, 1, 1000.0); ;", "sql": "SELECT MAX(Artworks.price) FROM Artworks INNER JOIN Artists ON Artworks.artist_id = Artists.artist_id WHERE Artists.country = 'Mexico';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 28).", "schema": null, "sql": "--\n-- pgp_pub_decrypt(data, key, psw)\n--\nCREATE FUNCTION pgp_pub_decrypt(bytea, bytea, text)\nRETURNS text\nAS 'MODULE_PATHNAME', 'pgp_pub_decrypt_text'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE SUM PLAYED WITH POINTS 1 OF 53, AND POSITION LARGER THAN 3?", "schema": "CREATE TABLE table_name_81 (played INTEGER, points_1 VARCHAR, position VARCHAR)", "sql": "SELECT SUM(played) FROM table_name_81 WHERE points_1 = '53' AND position > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Insert new records of workplaces, including safety violations and union affiliations.", "schema": "CREATE TABLE workplaces (id INT, name TEXT, safety_violation BOOLEAN, union_affiliation TEXT);", "sql": "INSERT INTO workplaces (id, name, safety_violation, union_affiliation) VALUES (5, 'RST Industries', TRUE, 'Union C'), (6, 'UVW Corporation', FALSE, 'Union D');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "What is the total weight of each strain type sold in California dispensaries in March 2022?", "schema": "CREATE TABLE sales (id INT, strain_id INT, weight DECIMAL(5,2), date DATE); INSERT INTO sales (id, strain_id, weight, date) VALUES (1, 1, 5.5, '2022-03-01'), (2, 2, 7.3, '2022-03-02');", "sql": "SELECT strain_id, type, SUM(weight) as total_weight FROM sales s JOIN strains st ON s.strain_id = st.id WHERE st.state = 'California' AND date BETWEEN '2022-03-01' AND '2022-03-31' GROUP BY strain_id, type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (test-es-only-fields, item 1).", "schema": null, "sql": "CREATE TABLE es_only (\n id serial8 not null primary key,\n first_name varchar(25),\n last_name varchar(64)\n);", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Calculate total revenue for all exhibitions", "schema": "CREATE TABLE Exhibitions (id INT, name VARCHAR(255), entry_fee DECIMAL(5,2)); CREATE TABLE Tickets (id INT, visitor_id INT, exhibition_id INT, price DECIMAL(5,2));", "sql": "SELECT SUM(Tickets.price) FROM Tickets JOIN Exhibitions ON Tickets.exhibition_id = Exhibitions.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is raymond floyd's lowest $?", "schema": "CREATE TABLE table_name_77 (money___ INTEGER, player VARCHAR)", "sql": "SELECT MIN(money___) AS $__ FROM table_name_77 WHERE player = 'raymond floyd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total cost of all the astrobiology missions that have been launched by private companies?", "schema": "CREATE TABLE missions (mission_id INT, mission_name VARCHAR(50), agency_type VARCHAR(50), cost INT); INSERT INTO missions (mission_id, mission_name, agency_type, cost) VALUES (1, 'Mission1', 'Private', 1000000), (2, 'Mission2', 'Government', 2000000), (3, 'Mission3', 'Private', 1500000);", "sql": "SELECT SUM(cost) FROM missions WHERE agency_type = 'Private' AND mission_name LIKE '%astrobiology%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the district for matthew clay", "schema": "CREATE TABLE table_2668387_18 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_2668387_18 WHERE incumbent = 'Matthew Clay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the lowest draw for Beathoven when the place was smaller than 16?", "schema": "CREATE TABLE table_name_55 (draw INTEGER, artist VARCHAR, place VARCHAR)", "sql": "SELECT MIN(draw) FROM table_name_55 WHERE artist = 'beathoven' AND place < 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total for the person with 73.28 bodyweight and fewer snatches than 75?", "schema": "CREATE TABLE table_name_22 (total__kg_ INTEGER, bodyweight VARCHAR, snatch VARCHAR)", "sql": "SELECT AVG(total__kg_) FROM table_name_22 WHERE bodyweight = 73.28 AND snatch < 75;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the grid when the driver is louis rosier and the laps is more than 78?", "schema": "CREATE TABLE table_name_39 (grid INTEGER, driver VARCHAR, laps VARCHAR)", "sql": "SELECT MIN(grid) FROM table_name_39 WHERE driver = 'louis rosier' AND laps > 78;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Find the vessel with the highest average speed that arrived in the Netherlands ports?", "schema": "CREATE TABLE VesselArrivals (ID INT, VesselName VARCHAR(50), ArrivalPort VARCHAR(50), ArrivalDate DATE, AverageSpeed DECIMAL(5,2)); INSERT INTO VesselArrivals (ID, VesselName, ArrivalPort, ArrivalDate, AverageSpeed) VALUES (1, 'Test Vessel 1', 'Amsterdam', '2022-01-01', 15.5), (2, 'Test Vessel 2', 'Rotterdam', '2022-01-02', 20.3), (3, 'Test Vessel 3', 'Utrecht', '2022-01-03', 18.5);", "sql": "SELECT VesselName, MAX(AverageSpeed) FROM VesselArrivals WHERE ArrivalPort LIKE 'Netherlands%' GROUP BY VesselName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Which forests have an average tree height over 45 meters?", "schema": "CREATE TABLE forests (id INT, name VARCHAR(255), hectares FLOAT, country VARCHAR(255)); INSERT INTO forests (id, name, hectares, country) VALUES (1, 'Boreal Forest', 1200000.0, 'Canada'), (2, 'Amazon Rainforest', 5500000.0, 'Brazil'), (3, 'Daintree Rainforest', 120000.0, 'Australia'); CREATE TABLE trees (id INT, species VARCHAR(255), height FLOAT, forest_id INT); INSERT INTO trees (id, species, height, forest_id) VALUES (1, 'White Spruce', 42.0, 1), (2, 'Rainforest Gum', 30.0, 2), (3, 'Southern Silky Oak', 70.0, 3); CREATE VIEW avg_tree_height AS SELECT forest_id, AVG(height) as avg_height FROM trees GROUP BY forest_id;", "sql": "SELECT forests.name FROM forests INNER JOIN avg_tree_height ON forests.id = avg_tree_height.forest_id WHERE avg_tree_height.avg_height > 45;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time for a lane less than 6, and a heat less than 4 for Joanne Malar?", "schema": "CREATE TABLE table_name_19 (time VARCHAR, name VARCHAR, lane VARCHAR, heat VARCHAR)", "sql": "SELECT time FROM table_name_19 WHERE lane < 6 AND heat < 4 AND name = 'joanne malar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many categories fall under the category of britons?", "schema": "CREATE TABLE table_261895_1 (type VARCHAR, nickname VARCHAR)", "sql": "SELECT COUNT(type) FROM table_261895_1 WHERE nickname = 'Britons';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 2009 for wta premier mandatory tournaments", "schema": "CREATE TABLE table_name_99 (tournament VARCHAR)", "sql": "SELECT 2009 FROM table_name_99 WHERE tournament = 'wta premier mandatory tournaments';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Which food products have been recalled due to contamination in the last 6 months?", "schema": "CREATE TABLE recalls (id INT, date TEXT, product TEXT, reason TEXT); INSERT INTO recalls (id, date, product, reason) VALUES (1, '2022-01-01', 'Spinach', 'Contamination');", "sql": "SELECT product, reason FROM recalls WHERE date > DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND reason = 'Contamination';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the goals when the country is wal, the name is smith and apps is more than 0?", "schema": "CREATE TABLE table_name_83 (goals INTEGER, apps VARCHAR, country VARCHAR, name VARCHAR)", "sql": "SELECT SUM(goals) FROM table_name_83 WHERE country = 'wal' AND name = 'smith' AND apps > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number on roll for Shelly Park school?", "schema": "CREATE TABLE table_name_7 (roll VARCHAR, name VARCHAR)", "sql": "SELECT COUNT(roll) FROM table_name_7 WHERE name = 'shelly park school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the music video that was from the album High Society, with a length of 3:50?", "schema": "CREATE TABLE table_name_91 (music_video VARCHAR, album VARCHAR, length VARCHAR)", "sql": "SELECT music_video FROM table_name_91 WHERE album = 'high society' AND length = '3:50';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Increase the value of military equipment sales by 15% for 'South Asia' in 2023", "schema": "CREATE TABLE military_sales_11 (id INT, region VARCHAR, year INT, value FLOAT);", "sql": "UPDATE military_sales_11 SET value = value * 1.15 WHERE region = 'South Asia' AND year = 2023;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Retrieve the total production and reserves for oil fields in the USA.", "schema": "CREATE TABLE OilFields (FieldID INT, FieldName VARCHAR(50), Country VARCHAR(50), Production INT, Reserves INT); INSERT INTO OilFields (FieldID, FieldName, Country, Production, Reserves) VALUES (1, 'Galaxy', 'USA', 20000, 500000); INSERT INTO OilFields (FieldID, FieldName, Country, Production, Reserves) VALUES (2, 'Apollo', 'Canada', 15000, 400000);", "sql": "SELECT Country, SUM(Production) AS Total_Production, SUM(Reserves) AS Total_Reserves FROM OilFields WHERE Country = 'USA' GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many steals did jasmine wynne have?", "schema": "CREATE TABLE table_23346303_4 (steals VARCHAR, player VARCHAR)", "sql": "SELECT steals FROM table_23346303_4 WHERE player = 'Jasmine Wynne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the torque of the engine is d5244 t5", "schema": "CREATE TABLE table_1147701_5 (torque__nm VARCHAR, engine_code VARCHAR)", "sql": "SELECT torque__nm AS @rpm_ FROM table_1147701_5 WHERE engine_code = 'D5244 T5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date for result loss for duke", "schema": "CREATE TABLE table_21092444_1 (date VARCHAR, result VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_21092444_1 WHERE result = 'Loss' AND opponent = 'Duke';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What are the product codes and names for products that share the same suppliers as product 'A101'?", "schema": "CREATE TABLE Suppliers (product_code TEXT, supplier_id INTEGER); INSERT INTO Suppliers (product_code, supplier_id) VALUES ('A101', 123), ('B203', 123), ('C405', 456), ('A101', 789);", "sql": "SELECT s2.product_code, p.product_name FROM Suppliers s1 JOIN Suppliers s2 ON s1.supplier_id = s2.supplier_id JOIN Products p ON s2.product_code = p.product_code WHERE s1.product_code = 'A101' AND s1.product_code != s2.product_code;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "How many users from the United States have played the virtual reality game \"Cybernetic Realms\" in the last month?", "schema": "CREATE TABLE users (id INT, country VARCHAR(50), game VARCHAR(50), last_played DATETIME); INSERT INTO users VALUES (1, 'United States', 'Cybernetic Realms', '2022-02-03 16:20:00'); INSERT INTO users VALUES (2, 'Canada', 'Cybernetic Realms', '2022-02-10 09:35:00');", "sql": "SELECT COUNT(*) FROM users WHERE country = 'United States' AND game = 'Cybernetic Realms' AND last_played >= DATE_SUB(NOW(), INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many judges points did the couple ranked at number 5 have?", "schema": "CREATE TABLE table_19744915_22 (judges INTEGER, rank VARCHAR)", "sql": "SELECT MAX(judges) FROM table_19744915_22 WHERE rank = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the number of disability support programs offered, partitioned by location and ordered from highest to lowest?", "schema": "CREATE TABLE Disability_Support_Programs (Location VARCHAR(20), Program VARCHAR(50), Type VARCHAR(20)); INSERT INTO Disability_Support_Programs VALUES ('Campus A', 'Assistive Technology', 'Resource Center'), ('Campus A', 'ASL Interpretation', 'Service'), ('Campus A', 'Mobility Support', 'Service'), ('Campus B', 'Assistive Technology', 'Resource Center'), ('Campus B', 'ASL Interpretation', 'Service'), ('Campus B', 'Mobility Support', 'Service'), ('Campus C', 'Assistive Technology', 'Resource Center'), ('Campus C', 'ASL Interpretation', 'Service');", "sql": "SELECT Location, COUNT(*) as Number_of_Programs, RANK() OVER (ORDER BY COUNT(*) DESC) as Rank FROM Disability_Support_Programs GROUP BY Location ORDER BY Rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was Footscray's opponent on June 15th of 1968?", "schema": "CREATE TABLE table_name_54 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team FROM table_name_54 WHERE away_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which 1st leg has a Team #1 of ummc ekaterinburg?", "schema": "CREATE TABLE table_name_33 (team__number1 VARCHAR)", "sql": "SELECT 1 AS st_leg FROM table_name_33 WHERE team__number1 = 'ummc ekaterinburg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgstattuple' (example 62).", "schema": null, "sql": "select pgstattuple_approx((select reltoastrelid from pg_class where relname = 'test'));", "explanation": "Example query from the 'pgstattuple' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Home team had attendance 160?", "schema": "CREATE TABLE table_name_91 (home_team VARCHAR, attendance VARCHAR)", "sql": "SELECT home_team FROM table_name_91 WHERE attendance = '160';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who directed the episode with production code 177605?", "schema": "CREATE TABLE table_26561506_1 (directed_by VARCHAR, production_code VARCHAR)", "sql": "SELECT directed_by FROM table_26561506_1 WHERE production_code = 177605;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the minimum total revenue for the telecom provider in all regions?", "schema": "CREATE TABLE revenues (revenue FLOAT, region VARCHAR(20)); INSERT INTO revenues (revenue, region) VALUES (120000, 'Southern'), (150000, 'Northern'), (180000, 'Western'), (200000, 'Northern'), (250000, 'Eastern'); CREATE TABLE minimum_revenue (min_revenue FLOAT); INSERT INTO minimum_revenue (min_revenue) VALUES (100000);", "sql": "SELECT MIN(revenue) FROM revenues;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 36).", "schema": null, "sql": "CREATE VIEW temp_view_test.v2 AS SELECT * FROM base_table;", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_am' (example 46).", "schema": null, "sql": "CREATE TABLE tableam_parted_a_heap2 PARTITION OF tableam_parted_heap2 FOR VALUES IN ('a');", "explanation": "DDL from PostgreSQL core regression test for Create Am.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Add a new record to the \"routes\" table with the following data: route_id = 5, origin = \"Seattle\", destination = \"New York\", distance = 2500, and eta = '2022-07-01'", "schema": "CREATE TABLE routes (route_id INT, origin VARCHAR(50), destination VARCHAR(50), distance INT, eta DATE);", "sql": "INSERT INTO routes (route_id, origin, destination, distance, eta) VALUES (5, 'Seattle', 'New York', 2500, '2022-07-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the outcome when the oppenent was serena williams venus williams and had a hard surface?", "schema": "CREATE TABLE table_name_60 (outcome VARCHAR, opponents_in_the_final VARCHAR, surface VARCHAR)", "sql": "SELECT outcome FROM table_name_60 WHERE opponents_in_the_final = 'serena williams venus williams' AND surface = 'hard';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the most common treatment type for patients in Germany?", "schema": "CREATE SCHEMA mental_health; USE mental_health; CREATE TABLE patients (patient_id INT, diagnosis VARCHAR(50), age INT, country VARCHAR(50)); CREATE TABLE treatments (treatment_id INT, patient_id INT, treatment_type VARCHAR(50), treatment_date DATE, country VARCHAR(50)); INSERT INTO treatments VALUES (4, 5, 'psychotherapy', '2021-01-01', 'Germany');", "sql": "SELECT treatment_type, COUNT(*) FROM treatments JOIN patients ON treatments.patient_id = patients.patient_id WHERE patients.country = 'Germany' GROUP BY treatment_type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "What is the minimum budget for all resilience projects in the infrastructure development database?", "schema": "CREATE TABLE if not exists Projects (id INT, name VARCHAR(50), type VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO Projects (id, name, type, budget) VALUES (1, 'Seawall', 'Resilience', 5000000.00), (2, 'Floodgate', 'Resilience', 3000000.00), (3, 'Bridge', 'Transportation', 8000000.00), (4, 'Highway', 'Transportation', 12000000.00), (5, 'Levee', 'Resilience', 2000000.00), (6, 'Pump Station', 'Resilience', 1500000.00);", "sql": "SELECT MIN(budget) FROM Projects WHERE type = 'Resilience';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 71).", "schema": null, "sql": "SELECT build_ordered_named_pairs(1,2);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "What is the minimum number of visitors to eco-friendly accommodations in North America?", "schema": "CREATE TABLE accommodations (id INT, name TEXT, continent TEXT, type TEXT, visitors INT); INSERT INTO accommodations (id, name, continent, type, visitors) VALUES (1, 'Eco Lodge', 'North America', 'Eco-friendly', 1500), (2, 'Green Hotel', 'North America', 'Eco-friendly', 1000);", "sql": "SELECT MIN(visitors) FROM accommodations WHERE continent = 'North America' AND type = 'Eco-friendly';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance at the Denver game?", "schema": "CREATE TABLE table_name_59 (location_attendance VARCHAR, team VARCHAR)", "sql": "SELECT location_attendance FROM table_name_59 WHERE team = 'denver';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Insert new records into the \"humanitarian_aid\" table for the following disaster_id, country_name, aid_amount, and aid_date values: (301, 'Bangladesh', 50000, '2021-12-25'), (302, 'Pakistan', 75000, '2021-12-27'), and (303, 'Nepal', 60000, '2021-12-30')", "schema": "CREATE TABLE humanitarian_aid (disaster_id INT, country_name VARCHAR(50), aid_amount INT, aid_date DATE);", "sql": "INSERT INTO humanitarian_aid (disaster_id, country_name, aid_amount, aid_date) VALUES (301, 'Bangladesh', 50000, '2021-12-25'), (302, 'Pakistan', 75000, '2021-12-27'), (303, 'Nepal', 60000, '2021-12-30');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "How many graduate students in the Business department have not published any papers in the year 2019?", "schema": "CREATE TABLE GraduateStudents (StudentID INT, Name VARCHAR(50), Department VARCHAR(50), Publications INT, PublicationYear INT);", "sql": "SELECT COUNT(StudentID) FROM GraduateStudents WHERE Department = 'Business' AND Publications = 0 AND PublicationYear = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What day was the score 1-0?", "schema": "CREATE TABLE table_name_39 (date VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_39 WHERE score = '1-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the date when the away team is newport county?", "schema": "CREATE TABLE table_name_98 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_98 WHERE away_team = 'newport county';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "List the total waste generation figures for each region, excluding regions with a population under 500,000.", "schema": "CREATE TABLE WasteGeneration (id INT, area VARCHAR(10), amount INT, region VARCHAR(10)); INSERT INTO WasteGeneration (id, area, amount, region) VALUES (1, 'Area1', 35000, 'RegionA'), (2, 'Area2', 20000, 'RegionA'), (3, 'Area3', 15000, 'RegionB'), (4, 'Area4', 10000, 'RegionB'); CREATE TABLE Population (id INT, region VARCHAR(10), population INT); INSERT INTO Population (id, region, population) VALUES (1, 'RegionA', 700000), (2, 'RegionB', 600000);", "sql": "SELECT W.region, SUM(W.amount) FROM WasteGeneration W INNER JOIN Population P ON W.region = P.region WHERE P.population >= 500000 GROUP BY W.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 314).", "schema": null, "sql": "select jsonb_path_query('\"10-03-2017\"', '$.datetime(\"dd-mm-yyyy\")');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"10-03-2017\"', '$.datetime(\"dd-mm-yyyy\")')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the minimum revenue generated from any sustainable tour in the USA?", "schema": "CREATE TABLE usa_tours (id INT, type VARCHAR(255), revenue FLOAT); INSERT INTO usa_tours (id, type, revenue) VALUES (1, 'Sustainable', 700.00), (2, 'Cultural', 800.00);", "sql": "SELECT MIN(revenue) FROM usa_tours WHERE type = 'Sustainable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the maximum range for electric vehicles in the US?", "schema": "CREATE TABLE EVs (Id INT PRIMARY KEY, Make VARCHAR(50), Model VARCHAR(50), Range INT, Country VARCHAR(50));", "sql": "SELECT MAX(Range) FROM EVs WHERE Country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With a Tally of 0-14, what is the Rank in Kilkenny County?", "schema": "CREATE TABLE table_name_3 (rank VARCHAR, county VARCHAR, tally VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_3 WHERE county = 'kilkenny' AND tally = '0-14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Determine the number of unique garment types present in the 'GarmentProduction' table but not present in the 'GarmentSales' table.", "schema": "CREATE TABLE GarmentProduction (garment_type VARCHAR(50)); INSERT INTO GarmentProduction (garment_type) VALUES ('T-Shirt'), ('Jeans'), ('Hoodie'); CREATE TABLE GarmentSales (garment_type VARCHAR(50)); INSERT INTO GarmentSales (garment_type) VALUES ('T-Shirt'), ('Jackets');", "sql": "SELECT COUNT(DISTINCT garment_type) FROM GarmentProduction WHERE garment_type NOT IN (SELECT garment_type FROM GarmentSales);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 165).", "schema": null, "sql": "select daterange('2000-01-10'::date, '2000-01-11'::date, '(]');", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select daterange('2000-01-10'::date, '2000-01-11'::date, '(]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a SQL definition from the postgrest project (big_schema, item 28).", "schema": null, "sql": "CREATE FUNCTION apflora.ap_insert_add_apart() RETURNS trigger\n LANGUAGE plpgsql\n AS $$\nBEGIN\n INSERT INTO\n apflora.apart (ap_id, art_id)\n VALUES (NEW.id, NEW.art_id);\n RETURN NEW;\nEND;\n$$;", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 200, "num_statements": 4} {"question": "Which teachers have participated in more than 5 professional development courses in the last year?", "schema": "CREATE TABLE teachers (teacher_id INT, teacher_name VARCHAR(100)); CREATE TABLE professional_development_courses (course_id INT, teacher_id INT, course_name VARCHAR(100), date DATE);", "sql": "SELECT t.teacher_id, t.teacher_name, COUNT(pdc.course_id) as num_courses FROM teachers t JOIN professional_development_courses pdc ON t.teacher_id = pdc.teacher_id WHERE pdc.date >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 YEAR) GROUP BY t.teacher_id HAVING num_courses > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "How many accessible taxi trips were there in the 'east' region in January 2022?", "schema": "CREATE TABLE taxi_trips (trip_id INT, region_id INT, trip_date DATE, is_accessible BOOLEAN); INSERT INTO taxi_trips (trip_id, region_id, trip_date, is_accessible) VALUES (1, 1, '2022-01-01', true), (2, 2, '2022-01-02', false), (3, 3, '2022-01-03', true), (4, 2, '2022-01-04', false);", "sql": "SELECT COUNT(*) FROM taxi_trips t WHERE t.region_id = (SELECT region_id FROM regions WHERE region_name = 'east') AND t.trip_date BETWEEN '2022-01-01' AND '2022-01-31' AND t.is_accessible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Mountain Peak has a Region of baja california, and a Location of 28.1301°n 115.2206°w?", "schema": "CREATE TABLE table_name_87 (mountain_peak VARCHAR, region VARCHAR, location VARCHAR)", "sql": "SELECT mountain_peak FROM table_name_87 WHERE region = 'baja california' AND location = '28.1301°n 115.2206°w';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much Population density (per km²) has a Name of total northern villages, and a Population (2006) larger than 11414?", "schema": "CREATE TABLE table_name_98 (population_density__per_km²_ VARCHAR, name VARCHAR, population__2006_ VARCHAR)", "sql": "SELECT COUNT(population_density__per_km²_) FROM table_name_98 WHERE name = 'total northern villages' AND population__2006_ > 11414;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who was the incoming manager for the 12th position in the table", "schema": "CREATE TABLE table_26914759_3 (incoming_manager VARCHAR, position_in_table VARCHAR)", "sql": "SELECT incoming_manager FROM table_26914759_3 WHERE position_in_table = '12th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List the fair trade certified factories in the factories table.", "schema": "CREATE TABLE factories (factory_id INT, factory_name TEXT, is_fair_trade_certified BOOLEAN); INSERT INTO factories VALUES (1, 'Green Factory', TRUE); INSERT INTO factories VALUES (2, 'Eco-friendly Solutions', FALSE);", "sql": "SELECT factory_name FROM factories WHERE is_fair_trade_certified = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of public works projects completed in 2019 and 2020 for each district?", "schema": "CREATE TABLE PublicWorks (id INT, district VARCHAR(20), year INT, completed INT); INSERT INTO PublicWorks (id, district, year, completed) VALUES (1, 'Downtown', 2019, 1), (2, 'Uptown', 2020, 1), (3, 'Downtown', 2020, 1);", "sql": "SELECT district, COUNT(*) as num_projects FROM PublicWorks WHERE year IN (2019, 2020) GROUP BY district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the percentage of total streams for each artist?", "schema": "CREATE TABLE AlbumStreams (AlbumID int, SongID int, StreamCount int, ArtistID int); INSERT INTO AlbumStreams (AlbumID, SongID, StreamCount, ArtistID) VALUES (1, 1, 1000, 1), (2, 2, 2000, 2), (3, 3, 1500, 3), (4, 4, 2500, 4), (5, 5, 1800, 5);", "sql": "SELECT Artists.ArtistName, (SUM(AlbumStreams.StreamCount) / (SELECT SUM(StreamCount) FROM AlbumStreams) * 100) as Percentage FROM Artists INNER JOIN AlbumStreams ON Artists.ArtistID = AlbumStreams.ArtistID GROUP BY Artists.ArtistName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "Update the labor cost of factories in Vietnam that use organic cotton to be $1.00 higher than the current value.", "schema": "CREATE TABLE factory_vietnam (factory VARCHAR(255), country VARCHAR(255), material VARCHAR(255), labor_cost DECIMAL(5,2)); INSERT INTO factory_vietnam (factory, country, material, labor_cost) VALUES ('Factory1', 'Vietnam', 'organic cotton', 5.00), ('Factory2', 'Vietnam', 'conventional cotton', 4.75), ('Factory3', 'Vietnam', 'organic cotton', 5.25);", "sql": "UPDATE factory_vietnam SET labor_cost = labor_cost + 1.00 WHERE country = 'Vietnam' AND material = 'organic cotton';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score of the game with 7523 in attendance?", "schema": "CREATE TABLE table_25331766_3 (final_score VARCHAR, attendance VARCHAR)", "sql": "SELECT final_score FROM table_25331766_3 WHERE attendance = 7523;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List the countries in Europe with travel advisories and their respective regions, along with the total expenditure by international visitors for the last available year.", "schema": "CREATE TABLE travel_advisories_europe (id INT, country VARCHAR(50), region VARCHAR(50), travel_warning BOOLEAN, advisory_text TEXT, year INT, continent VARCHAR(10)); INSERT INTO travel_advisories_europe (id, country, region, travel_warning, advisory_text, year, continent) VALUES (1, 'France', 'Europe', true, 'Avoid non-essential travel due to civil unrest.', 2022, 'Europe');", "sql": "SELECT ta.country, ta.region, t.total_expenditure FROM travel_advisories_europe ta JOIN tourism_spending t ON ta.country = t.country AND t.year = (SELECT MAX(year) FROM tourism_spending WHERE country = ta.country AND continent = ta.continent) WHERE ta.travel_warning = true AND ta.continent = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 302, "num_statements": 1} {"question": "Find the number of research grants awarded to female faculty members in the Computer Science department", "schema": "CREATE TABLE faculty(faculty_id INT, name VARCHAR(50), gender VARCHAR(10), department VARCHAR(20)); INSERT INTO faculty VALUES (1, 'Alice', 'Female', 'Computer Science'); INSERT INTO faculty VALUES (2, 'Bob', 'Male', 'Computer Science'); CREATE TABLE research_grants(grant_id INT, faculty_id INT, amount DECIMAL(10, 2)); INSERT INTO research_grants VALUES (1, 1, 50000); INSERT INTO research_grants VALUES (2, 2, 75000);", "sql": "SELECT COUNT(*) FROM faculty f INNER JOIN research_grants g ON f.faculty_id = g.faculty_id WHERE f.gender = 'Female' AND f.department = 'Computer Science';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Record has Points larger than 0, and a Score of 7–3?", "schema": "CREATE TABLE table_name_96 (record VARCHAR, points VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_96 WHERE points > 0 AND score = '7–3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many public consultations in the environment domain have taken place in the city of Toronto in the last year?", "schema": "CREATE TABLE consultation (id INT, name VARCHAR(255), domain VARCHAR(255), city VARCHAR(255), start_date DATE); INSERT INTO consultation (id, name, domain, city, start_date) VALUES (1, 'Waste Management', 'Environment', 'Toronto', '2022-02-01'); INSERT INTO consultation (id, name, domain, city, start_date) VALUES (2, 'Park Clean-up', 'Environment', 'Toronto', '2021-12-15');", "sql": "SELECT COUNT(*) FROM consultation WHERE domain = 'Environment' AND city = 'Toronto' AND start_date >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What music is in the film before 1963?", "schema": "CREATE TABLE table_name_53 (music VARCHAR, year INTEGER)", "sql": "SELECT music FROM table_name_53 WHERE year < 1963;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What was the landfill capacity in cubic meters for each state in the USA in 2020?", "schema": "CREATE TABLE landfill_capacity (state VARCHAR(255), year INT, capacity INT); INSERT INTO landfill_capacity (state, year, capacity) VALUES ('California', 2020, 1500000), ('Texas', 2020, 1600000), ('New York', 2020, 1200000), ('Florida', 2020, 1400000);", "sql": "SELECT state, capacity FROM landfill_capacity WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 723).", "schema": null, "sql": "SELECT to_number('5 4 4 4 4 8 . 7 8', '9 9 9 9 9 9 . 9 9');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_number('5 4 4 4 4 8 . 7 8', '9 9 9 9 9 9 . 9 9')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Delete all food justice projects in 'California' from the 'food_justice_projects' table.", "schema": "CREATE TABLE food_justice_projects (id INT, name TEXT, location TEXT); INSERT INTO food_justice_projects (id, name, location) VALUES (1, 'Project A', 'California'), (2, 'Project B', 'New York'), (3, 'Project C', 'California');", "sql": "DELETE FROM food_justice_projects WHERE location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Identify the policy trends in clean energy for countries in the European Union.", "schema": "CREATE TABLE eu_clean_energy_policy (country VARCHAR(30), policy_date DATE, policy_description TEXT); INSERT INTO eu_clean_energy_policy (country, policy_date, policy_description) VALUES ('Germany', '2022-01-01', 'Implemented a new feed-in tariff for solar energy.'), ('France', '2021-06-15', 'Expanded offshore wind energy capacity.'), ('Spain', '2022-04-20', 'Increased funding for renewable energy research and development.'), ('Italy', '2021-12-10', 'Phased out coal-fired power plants.'), ('Poland', '2022-02-15', 'Introduced a carbon tax on heavy industry.');", "sql": "SELECT country, policy_description FROM eu_clean_energy_policy WHERE country IN ('Germany', 'France', 'Spain', 'Italy', 'Poland') ORDER BY policy_date DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 39).", "schema": null, "sql": "SELECT 'Δ' ~* '[γ-λ]' COLLATE PG_UNICODE_FAST;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'Δ' ~* '[γ-λ]' COLLATE PG_UNICODE_FAST) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Insert new records for a game designer from Japan who has adopted virtual reality technology, and the games they have created with a rating of 9 or higher.", "schema": "CREATE TABLE GameDesigners (DesignerID INT, Name VARCHAR(30), Country VARCHAR(20), VRAdoption BOOLEAN);CREATE TABLE Games (GameID INT, GameName VARCHAR(30), Rating INT, DesignerID INT);", "sql": "INSERT INTO GameDesigners (DesignerID, Name, Country, VRAdoption) VALUES (1, 'Hiroshi Tanaka', 'Japan', TRUE);INSERT INTO Games (GameID, GameName, Rating, DesignerID) VALUES (1, 'Samurai Sword', 9, 1), (2, 'Ninja Scroll', 10, 1);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 2} {"question": "Show the total revenue from football ticket sales in 2021", "schema": "CREATE TABLE ticket_sales (ticket_id INT, sale_date DATE, event_type VARCHAR(10), revenue DECIMAL(10,2)); INSERT INTO ticket_sales (ticket_id, sale_date, event_type, revenue) VALUES (1, '2021-08-01', 'Football', 50.00), (2, '2022-04-10', 'Football', 75.00), (3, '2021-12-31', 'Football', 60.00);", "sql": "SELECT SUM(revenue) as total_revenue FROM ticket_sales WHERE event_type = 'Football' AND YEAR(sale_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "List all unique sectors and their corresponding minimum ESG ratings?", "schema": "CREATE TABLE companies (id INT, sector VARCHAR(20), ESG_rating FLOAT); INSERT INTO companies (id, sector, ESG_rating) VALUES (1, 'technology', 7.5), (2, 'finance', 6.8), (3, 'technology', 8.2), (4, 'renewable_energy', 9.0);", "sql": "SELECT sector, MIN(ESG_rating) FROM companies GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the difference in budget between the 'research' and 'development' departments for the year 2019?", "schema": "CREATE TABLE department_budgets (department TEXT, budget INT, year INT); INSERT INTO department_budgets (department, budget, year) VALUES ('research', 1000000, 2019), ('development', 1200000, 2019), ('research', 1100000, 2020), ('development', 1300000, 2020);", "sql": "SELECT LAG(budget, 1, 0) OVER (ORDER BY year) as prev_budget, budget, budget - LAG(budget, 1, 0) OVER (ORDER BY year) as budget_diff FROM department_budgets WHERE department IN ('research', 'development') AND year = 2019 AND budget IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 244, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER CONVERSION (example 2).", "schema": null, "sql": "ALTER CONVERSION iso_8859_1_to_utf8 OWNER TO joe;", "explanation": "PostgreSQL ALTER CONVERSION command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Call (example 18).", "schema": null, "sql": "-- output arguments\n\nCREATE PROCEDURE test_proc5(INOUT a text)\nLANGUAGE plpgsql\nAS $$\nBEGIN\n a := a || '+' || a;\nEND;\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Call.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 124, "num_statements": 3} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 219).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION fk_ok ( NAME, NAME[], NAME, NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Calculate the total budget allocated to each department in the \"Budget\" table, where the department name contains 'Education'.", "schema": "CREATE TABLE Budget (id INT, department VARCHAR(50), allocated_budget FLOAT); INSERT INTO Budget (id, department, allocated_budget) VALUES (1, 'Education - Primary', 1000000.0), (2, 'Education - Secondary', 1500000.0), (3, 'Healthcare', 2000000.0), (4, 'Transportation', 1200000.0), (5, 'Education - Higher', 2500000.0);", "sql": "SELECT department, SUM(allocated_budget) as total_budget FROM Budget WHERE department LIKE '%Education%' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the Eastern Nazarene College located?", "schema": "CREATE TABLE table_1973729_1 (location VARCHAR, institution VARCHAR)", "sql": "SELECT location FROM table_1973729_1 WHERE institution = 'Eastern Nazarene College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Athlete has a Place of 1, and a Year smaller than 1988, and a Country of gre, and a Time of 21:57:00?", "schema": "CREATE TABLE table_name_97 (athlete VARCHAR, time VARCHAR, country VARCHAR, place VARCHAR, year VARCHAR)", "sql": "SELECT athlete FROM table_name_97 WHERE place = 1 AND year < 1988 AND country = 'gre' AND time = '21:57:00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Update the end date of the 'Habitat Restoration' effort in the 'conservation_efforts' table", "schema": "CREATE TABLE conservation_efforts (id INT PRIMARY KEY, location VARCHAR(50), start_date DATE, end_date DATE, effort_description VARCHAR(255));", "sql": "UPDATE conservation_efforts SET end_date = '2023-12-31' WHERE effort_description = 'Habitat Restoration';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the mascot with the colors green and navy?", "schema": "CREATE TABLE table_15873547_1 (mascot VARCHAR, colors VARCHAR)", "sql": "SELECT mascot FROM table_15873547_1 WHERE colors = 'Green and Navy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many losses does Abercarn RFC have?", "schema": "CREATE TABLE table_name_90 (lost VARCHAR, club VARCHAR)", "sql": "SELECT lost FROM table_name_90 WHERE club = 'abercarn rfc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season was Barcelona ranked higher than 12, had more than 96 goals and had more than 26 apps?", "schema": "CREATE TABLE table_name_59 (season VARCHAR, rank VARCHAR, goals VARCHAR, apps VARCHAR, club VARCHAR)", "sql": "SELECT season FROM table_name_59 WHERE apps > 26 AND club = 'barcelona' AND goals > 96 AND rank > 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the highest assists on March 18?", "schema": "CREATE TABLE table_17121262_9 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT high_assists FROM table_17121262_9 WHERE date = 'March 18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How man seasons have the air date of October 2, 2001?", "schema": "CREATE TABLE table_2219961_2 (season__number VARCHAR, nbc_airdate VARCHAR)", "sql": "SELECT COUNT(season__number) FROM table_2219961_2 WHERE nbc_airdate = 'October 2, 2001';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the party for the first elected in 1980", "schema": "CREATE TABLE table_1341472_15 (party VARCHAR, first_elected VARCHAR)", "sql": "SELECT party FROM table_1341472_15 WHERE first_elected = '1980';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Where are the renewable energy projects located with a capacity greater than 700 MW?", "schema": "CREATE TABLE renewable_energy_projects (id INT, project_name VARCHAR(50), location VARCHAR(50), capacity_mw INT); INSERT INTO renewable_energy_projects (id, project_name, location, capacity_mw) VALUES (1, 'Wind Farm XYZ', 'Texas', 500); INSERT INTO renewable_energy_projects (id, project_name, location, capacity_mw) VALUES (2, 'Solar Park ABC', 'California', 800); INSERT INTO renewable_energy_projects (id, project_name, location, capacity_mw) VALUES (4, 'Hydro Plant DEF', 'Norway', 900);", "sql": "SELECT location FROM renewable_energy_projects WHERE capacity_mw > 700;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum memory for the model discontinued in November 2001?", "schema": "CREATE TABLE table_10528691_4 (maximum_memory VARCHAR, discontinued VARCHAR)", "sql": "SELECT maximum_memory FROM table_10528691_4 WHERE discontinued = 'November 2001';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the source of the move from Port Vale?", "schema": "CREATE TABLE table_name_97 (source VARCHAR, moving_from VARCHAR)", "sql": "SELECT source FROM table_name_97 WHERE moving_from = 'port vale';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total oil production for each country in 2020?", "schema": "CREATE TABLE production_figures (year INT, country VARCHAR(50), oil_production_mbbl INT);", "sql": "SELECT country, SUM(oil_production_mbbl) FROM production_figures WHERE year = 2020 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the average age of players?", "schema": "Player_Demographics", "sql": "SELECT AVG(Age) FROM Player_Demographics;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the result was l 13-7 ot, who was the opponent?", "schema": "CREATE TABLE table_name_27 (opponent VARCHAR, result VARCHAR)", "sql": "SELECT opponent FROM table_name_27 WHERE result = 'l 13-7 ot';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Count the number of publications by graduate students in the Mathematics department in the last 5 years.", "schema": "CREATE TABLE students (id INT, name VARCHAR(50), department VARCHAR(50), start_year INT); INSERT INTO students (id, name, department, start_year) VALUES (1, 'Charlie', 'Mathematics', 2018); INSERT INTO students (id, name, department, start_year) VALUES (2, 'Dana', 'Computer Science', 2019); CREATE TABLE publications (id INT, student_id INT, year INT, title VARCHAR(100)); INSERT INTO publications (id, student_id, year, title) VALUES (1, 1, 2020, 'Theory of Algebra'); INSERT INTO publications (id, student_id, year, title) VALUES (2, 2, 2021, 'Machine Learning Algorithms');", "sql": "SELECT COUNT(p.id) FROM publications p JOIN students s ON p.student_id = s.id WHERE s.department = 'Mathematics' AND p.year BETWEEN YEAR(CURRENT_DATE) - 5 AND YEAR(CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "What was the minimum ticket price for an exhibition in London before 2020?", "schema": "CREATE TABLE Exhibitions (id INT, city VARCHAR(50), year INT, ticket_price DECIMAL(5,2));INSERT INTO Exhibitions (id, city, year, ticket_price) VALUES (1, 'London', 2019, 15.00), (2, 'London', 2018, 10.00), (3, 'Paris', 2017, 20.00);", "sql": "SELECT MIN(ticket_price) FROM Exhibitions WHERE city = 'London' AND year < 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Season, when Prize is \"$100,000\"?", "schema": "CREATE TABLE table_name_37 (season VARCHAR, prize VARCHAR)", "sql": "SELECT season FROM table_name_37 WHERE prize = '$100,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the directed by season # 2", "schema": "CREATE TABLE table_23286722_1 (directed_by VARCHAR, season__number VARCHAR)", "sql": "SELECT directed_by FROM table_23286722_1 WHERE season__number = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average disinformation score for content creators in the Middle East, grouped by gender?", "schema": "CREATE TABLE content_creators (creator_id INT, gender VARCHAR(50), country VARCHAR(50), disinformation_score INT); INSERT INTO content_creators (creator_id, gender, country, disinformation_score) VALUES (1, 'Female', 'Saudi Arabia', 50), (2, 'Male', 'Iran', 45), (3, 'Female', 'Turkey', 55);", "sql": "SELECT gender, AVG(disinformation_score) as avg_score FROM content_creators WHERE country IN ('Saudi Arabia', 'Iran', 'Turkey', 'United Arab Emirates', 'Israel') GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which English winning song had the winner aris runtuwene?", "schema": "CREATE TABLE table_name_7 (English VARCHAR, winner VARCHAR)", "sql": "SELECT WINNING_SONG(English AS Title) FROM table_name_7 WHERE winner = 'aris runtuwene';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is player Raymond Floyd from?", "schema": "CREATE TABLE table_name_35 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_35 WHERE player = 'raymond floyd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average day in December of the game with a 8-2-3 record?", "schema": "CREATE TABLE table_name_25 (december INTEGER, record VARCHAR)", "sql": "SELECT AVG(december) FROM table_name_25 WHERE record = '8-2-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the maximum claim amount for policies sold in California?", "schema": "CREATE TABLE Claims (PolicyID int, ClaimAmount int, SaleState varchar(20)); INSERT INTO Claims (PolicyID, ClaimAmount, SaleState) VALUES (1, 500, 'California'), (2, 2000, 'New York'), (3, 800, 'California');", "sql": "SELECT MAX(ClaimAmount) OVER (PARTITION BY SaleState) as MaxClaimAmount FROM Claims WHERE SaleState = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest grid for Roberto Rolfo with more than 26 laps?", "schema": "CREATE TABLE table_name_87 (grid INTEGER, rider VARCHAR, laps VARCHAR)", "sql": "SELECT MIN(grid) FROM table_name_87 WHERE rider = 'roberto rolfo' AND laps > 26;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which space agencies have launched missions to Mars?", "schema": "CREATE TABLE space_mars_missions (agency VARCHAR(50), mission VARCHAR(50), year INTEGER); INSERT INTO space_mars_missions (agency, mission, year) VALUES ('NASA', 'Mars Pathfinder', 1997), ('NASA', 'Mars Global Surveyor', 1997), ('ISAS', 'Nozomi', 1998), ('NASA', 'Mars Climate Orbiter', 1999), ('NASA', 'Mars Polar Lander', 1999), ('NASA', 'Mars Odyssey', 2001), ('ESA', 'Mars Express', 2003), ('Beagle Consortium', 'Beagle 2', 2003), ('NASA', 'Mars Reconnaissance Orbiter', 2006), ('NASA', 'Phoenix', 2008), ('NASA', 'Mars Science Laboratory', 2012), ('NASA', 'Mars Atmosphere and Volatile Evolution', 2013), ('NASA', 'MAVEN', 2014), ('ESA', 'ExoMars Trace Gas Orbiter', 2016), ('NASA', 'InSight', 2018), ('UAE Space Agency', 'Hope Mars Mission', 2021), ('CNSA', 'Tianwen-1', 2021);", "sql": "SELECT agency FROM space_mars_missions GROUP BY agency;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'with': Write the SELECT query (example 100).", "schema": null, "sql": "select pg_get_viewdef('v_cycle1');", "explanation": "Regression test for With in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_viewdef('v_cycle1')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest First Title, when All-Time is greater than 1, when Country is \"United States (USA)\", and when Amateur Era is greater than 17?", "schema": "CREATE TABLE table_name_13 (first_title INTEGER, amateur_era VARCHAR, all_time VARCHAR, country VARCHAR)", "sql": "SELECT MIN(first_title) FROM table_name_13 WHERE all_time > 1 AND country = 'united states (usa)' AND amateur_era > 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "List all authors who have published articles in the 'business' section.", "schema": "CREATE TABLE articles (id INT, author VARCHAR(255), title VARCHAR(255), section VARCHAR(255), date DATE);", "sql": "SELECT DISTINCT author FROM articles WHERE section='business';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What are the names of all vessels that are either cargo ships or tankers?", "schema": "CREATE TABLE Vessels (ID INT, Name VARCHAR(50), Type VARCHAR(50)); INSERT INTO Vessels (ID, Name, Type) VALUES (1, 'MV Pacific', 'Cargo Ship'), (2, 'MV Persian Gulf', 'Tanker');", "sql": "SELECT Name FROM Vessels WHERE Type IN ('Cargo Ship', 'Tanker');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time for Brazil?", "schema": "CREATE TABLE table_name_5 (time VARCHAR, country VARCHAR)", "sql": "SELECT time FROM table_name_5 WHERE country = 'brazil';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'hstore_plpython' (item 22).", "schema": null, "sql": "CREATE FUNCTION test4() RETURNS trigger\nLANGUAGE plpython3u\nTRANSFORM FOR TYPE hstore\nAS $$\nassert(TD[\"new\"] == {'a': 1, 'b': {'aa': 'bb', 'cc': None}})\nif TD[\"new\"][\"a\"] == 1:\n TD[\"new\"][\"b\"] = {'a': 1, 'b': 'boo', 'c': None}\n\nreturn \"MODIFY\"\n$$;", "explanation": "SQL definition from the 'hstore_plpython' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When £337,000 is £ saved what is the percentage of electricity reduction?", "schema": "CREATE TABLE table_29538735_1 (_percentage_electricity_reduction VARCHAR, £_saved VARCHAR)", "sql": "SELECT _percentage_electricity_reduction FROM table_29538735_1 WHERE £_saved = '£337,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the titles of segment c for series episode is 21-08?", "schema": "CREATE TABLE table_15187735_21 (segment_c VARCHAR, series_ep VARCHAR)", "sql": "SELECT segment_c FROM table_15187735_21 WHERE series_ep = '21-08';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the round number for the date 19/06/2009?", "schema": "CREATE TABLE table_21311525_1 (round VARCHAR, date VARCHAR)", "sql": "SELECT round FROM table_21311525_1 WHERE date = '19/06/2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 549).", "schema": null, "sql": "CREATE UNIQUE INDEX CONCURRENTLY concur_reindex_ind5 ON concur_reindex_tab4 (c1);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": true, "sql_length": 81, "num_statements": 1} {"question": "How many fish are there in fish farms located in the Mediterranean Sea with a density greater than 300 fish/m3?", "schema": "CREATE TABLE fish_farms (id INT, name TEXT, location TEXT, fish_density INT); INSERT INTO fish_farms (id, name, location, fish_density) VALUES (1, 'Farm A', 'Mediterranean Sea', 400), (2, 'Farm B', 'Atlantic Ocean', 200);", "sql": "SELECT COUNT(*) FROM fish_farms WHERE location = 'Mediterranean Sea' AND fish_density > 300;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Delete records with a CO2 emission value lower than 100 in the 'emissions' table for the year 2005.", "schema": "CREATE TABLE emissions (country VARCHAR(255), year INT, co2_emission FLOAT); INSERT INTO emissions (country, year, co2_emission) VALUES ('Canada', 2005, 550.0), ('US', 2005, 520.0), ('Russia', 2005, 190.0), ('Finland', 2005, 60.0), ('Canada', 2005, 80.0);", "sql": "DELETE FROM emissions WHERE year = 2005 AND co2_emission < 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the oakachoy covered bridge listed?", "schema": "CREATE TABLE table_name_34 (listed VARCHAR, name VARCHAR)", "sql": "SELECT listed FROM table_name_34 WHERE name = 'oakachoy covered bridge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who lost the game on May 28?", "schema": "CREATE TABLE table_name_38 (loss VARCHAR, date VARCHAR)", "sql": "SELECT loss FROM table_name_38 WHERE date = 'may 28';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Original title of возвращение / vozvrashcheniye had what English title?", "schema": "CREATE TABLE table_name_79 (english_title VARCHAR, original_title VARCHAR)", "sql": "SELECT english_title FROM table_name_79 WHERE original_title = 'возвращение / vozvrashcheniye';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which nation ran a time of 9.69 seconds?", "schema": "CREATE TABLE table_name_84 (nation VARCHAR, performance VARCHAR)", "sql": "SELECT nation FROM table_name_84 WHERE performance = '9.69';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total number of salmon farms in Norway and Scotland?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, country TEXT); INSERT INTO salmon_farms (id, name, country) VALUES (1, 'Farm A', 'Norway'); INSERT INTO salmon_farms (id, name, country) VALUES (2, 'Farm B', 'Norway'); INSERT INTO salmon_farms (id, name, country) VALUES (3, 'Farm C', 'Scotland'); INSERT INTO salmon_farms (id, name, country) VALUES (4, 'Farm D', 'Scotland');", "sql": "SELECT COUNT(*) FROM salmon_farms WHERE country IN ('Norway', 'Scotland');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which driver for Greenfield Mowers Racing has fewer than 36 points?", "schema": "CREATE TABLE table_name_82 (driver VARCHAR, points VARCHAR, team VARCHAR)", "sql": "SELECT driver FROM table_name_82 WHERE points < 36 AND team = 'greenfield mowers racing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the total number of properties in urban areas with co-ownership agreements, and their average price?", "schema": "CREATE TABLE property (id INT, price INT, area VARCHAR(255), co_ownership BOOLEAN); INSERT INTO property (id, price, area, co_ownership) VALUES (1, 200000, 'urban', true), (2, 300000, 'rural', false);", "sql": "SELECT SUM(price), AVG(price) FROM property WHERE area = 'urban' AND co_ownership = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the Laps for time/retired of +1:08.491", "schema": "CREATE TABLE table_name_39 (laps VARCHAR, time_retired VARCHAR)", "sql": "SELECT laps FROM table_name_39 WHERE time_retired = '+1:08.491';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'tidrangescan': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;", "explanation": "Regression test for Tidrangescan in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the producers of 磊磊牌嬰幼兒配方乳粉 ?", "schema": "CREATE TABLE table_18943444_1 (producer VARCHAR, product VARCHAR)", "sql": "SELECT producer FROM table_18943444_1 WHERE product = '磊磊牌嬰幼兒配方乳粉';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 144).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Carlos');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total budget for all education programs in the 'Asia' region?", "schema": "CREATE TABLE Budget (id INT, program_id INT, amount DECIMAL(10,2)); INSERT INTO Budget (id, program_id, amount) VALUES (1, 1, 10000.00), (2, 2, 20000.00), (3, 3, 30000.00);", "sql": "SELECT SUM(b.amount) FROM Budget b INNER JOIN EducationPrograms e ON b.program_id = e.id INNER JOIN Coordinators c ON e.coordinator_id = c.id WHERE c.region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'join' (example 283).", "schema": null, "sql": "create temp table a (i integer);", "explanation": "DDL from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the average rating of movies produced in Canada and the USA?", "schema": "CREATE TABLE movie (id INT, title VARCHAR(255), rating DECIMAL(3,2), country VARCHAR(255)); INSERT INTO movie (id, title, rating, country) VALUES (1, 'Movie1', 7.5, 'Canada'), (2, 'Movie2', 8.2, 'USA'), (3, 'Movie3', 6.9, 'Mexico');", "sql": "SELECT AVG(rating) FROM movie WHERE country IN ('Canada', 'USA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many charging stations are there in South Korea?", "schema": "CREATE TABLE Charging_Stations_South_Korea (Id INT, Type VARCHAR(50), Location VARCHAR(50)); INSERT INTO Charging_Stations_South_Korea (Id, Type, Location) VALUES (1, 'Public', 'South Korea'), (2, 'Private', 'South Korea'), (3, 'Public', 'Japan');", "sql": "SELECT COUNT(*) FROM Charging_Stations_South_Korea WHERE Type = 'Public' OR Type = 'Private';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List the names of states that have both healthcare facilities and mental health facilities.", "schema": "CREATE TABLE healthcare_facilities (id INT, name VARCHAR(50), state VARCHAR(10)); INSERT INTO healthcare_facilities (id, name, state) VALUES (1, 'Facility A', 'State 1'), (2, 'Facility B', 'State 2'), (3, 'Facility C', 'State 3');", "sql": "SELECT h.state FROM healthcare_facilities h INNER JOIN mental_health_facilities m ON h.state = m.state GROUP BY h.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the minimum price of non-vegan menu items?", "schema": "CREATE TABLE MenuItems (menu_item_id INT, name VARCHAR(255), price DECIMAL(5,2), is_vegan BOOLEAN); INSERT INTO MenuItems (menu_item_id, name, price, is_vegan) VALUES (1, 'Burger', 12.99, false), (2, 'Steak', 25.99, false), (3, 'Fries', 3.99, true);", "sql": "SELECT MIN(price) FROM MenuItems WHERE is_vegan = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the maximum emission quantity for each emission type in pollution sources that have not been inspected in the last 6 months?", "schema": "CREATE TABLE PollutionSources (id INT, source_name VARCHAR(255), emission_type VARCHAR(255), emission_quantity INT, last_inspection DATE);", "sql": "SELECT emission_type, MAX(emission_quantity) FROM PollutionSources WHERE last_inspection <= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY emission_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the grid average with a +1 lap time and more than 27 laps?", "schema": "CREATE TABLE table_name_52 (grid INTEGER, time_retired VARCHAR, laps VARCHAR)", "sql": "SELECT AVG(grid) FROM table_name_52 WHERE time_retired = '+1 lap' AND laps > 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "How many donations were made in the year 2020?", "schema": "CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2), donation_date DATE);", "sql": "SELECT COUNT(*) FROM donations WHERE YEAR(donation_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the candidates for result of lost renomination democratic loss", "schema": "CREATE TABLE table_1342149_24 (candidates VARCHAR, result VARCHAR)", "sql": "SELECT candidates FROM table_1342149_24 WHERE result = 'Lost renomination Democratic loss';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Show the railway stations in Canada", "schema": "CREATE TABLE Infrastructure (id INT, name VARCHAR(100), type VARCHAR(50), country VARCHAR(50)); INSERT INTO Infrastructure (id, name, type, country) VALUES (9, 'Toronto Union Station', 'Railway Station', 'Canada'), (10, 'Vancouver Pacific Central Station', 'Railway Station', 'Canada');", "sql": "SELECT name FROM Infrastructure WHERE type = 'Railway Station' AND country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average assets value for clients in the 'South' region?", "schema": "CREATE TABLE clients (id INT, name TEXT, region TEXT, assets FLOAT); INSERT INTO clients (id, name, region, assets) VALUES (1, 'María Rodríguez', 'South', 60000.00), (2, 'David Kim', 'North', 90000.00), (3, 'Sophia Chen', 'East', 110000.00), (4, 'Taro Yamada', 'West', 130000.00);", "sql": "SELECT AVG(assets) FROM clients WHERE region = 'South';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Find the total number of repeat attendees for workshops and lectures, and the number of unique attendees.", "schema": "CREATE TABLE attendees (id INT, event_id INT, repeat INT); INSERT INTO attendees (id, event_id, repeat) VALUES (1, 1, 0); INSERT INTO attendees (id, event_id, repeat) VALUES (2, 1, 1); INSERT INTO attendees (id, event_id, repeat) VALUES (3, 2, 0); INSERT INTO attendees (id, event_id, repeat) VALUES (4, 3, 0); INSERT INTO attendees (id, event_id, repeat) VALUES (5, 3, 1); INSERT INTO attendees (id, event_id, repeat) VALUES (6, 4, 0); INSERT INTO attendees (id, event_id, repeat) VALUES (7, 5, 1); INSERT INTO attendees (id, event_id, repeat) VALUES (8, 5, 1);", "sql": "SELECT SUM(repeat), COUNT(DISTINCT id) FROM attendees WHERE event_id IN (SELECT event_id FROM events WHERE type IN ('Workshop', 'Lecture'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest height (ft) of a building in Frankfurt with a height (m) of 257 and less than 55 floors?", "schema": "CREATE TABLE table_name_24 (height__ft_ INTEGER, floors VARCHAR, city VARCHAR, height__m_ VARCHAR)", "sql": "SELECT MIN(height__ft_) FROM table_name_24 WHERE city = 'frankfurt' AND height__m_ = 257 AND floors < 55;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the count of building permits issued per month in Colorado in 2020?", "schema": "CREATE TABLE building_permits (id INT, permit_number VARCHAR(20), issue_date DATE, state VARCHAR(10)); INSERT INTO building_permits (id, permit_number, issue_date, state) VALUES (1, '12345', '2020-01-01', 'Colorado'); INSERT INTO building_permits (id, permit_number, issue_date, state) VALUES (2, '67890', '2020-02-15', 'Colorado');", "sql": "SELECT MONTH(issue_date), COUNT(*) FROM building_permits WHERE state = 'Colorado' AND YEAR(issue_date) = 2020 GROUP BY MONTH(issue_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the average delivery time for packages shipped via air from each warehouse in Q3 2021?", "schema": "CREATE TABLE deliveries (id INT, delivery_time FLOAT, warehouse VARCHAR(20), quarter INT, shipment_type VARCHAR(20)); INSERT INTO deliveries (id, delivery_time, warehouse, quarter, shipment_type) VALUES (1, 1.5, 'New York', 3, 'Air'), (2, 5.0, 'Seattle', 1, 'Ground'), (3, 1.2, 'New York', 3, 'Air'); CREATE TABLE warehouses (id INT, name VARCHAR(20)); INSERT INTO warehouses (id, name) VALUES (1, 'New York'), (2, 'Seattle'); CREATE TABLE shipment_types (id INT, type VARCHAR(20)); INSERT INTO shipment_types (id, type) VALUES (1, 'Air'), (2, 'Ground');", "sql": "SELECT p.warehouse, AVG(d.delivery_time) FROM deliveries d JOIN warehouses w ON d.warehouse = w.name JOIN shipment_types st ON d.shipment_type = st.type WHERE st.type = 'Air' AND d.quarter = 3 GROUP BY p.warehouse;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many first downs did the bills have when their opponent had 6", "schema": "CREATE TABLE table_16028459_2 (bills_first_downs INTEGER, opponents VARCHAR)", "sql": "SELECT MIN(bills_first_downs) FROM table_16028459_2 WHERE opponents = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many posts were made by each influencer in the last month?", "schema": "CREATE TABLE influencers (influencer_id INT, influencer_name TEXT);CREATE TABLE posts (post_id INT, post_text TEXT, influencer_id INT, post_date DATE);", "sql": "SELECT i.influencer_id, i.influencer_name, COUNT(p.post_id) as posts_last_month FROM influencers i JOIN posts p ON i.influencer_id = p.influencer_id WHERE p.post_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY i.influencer_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "What is the average engagement time for virtual tours in Mexico and Brazil?", "schema": "CREATE TABLE virtual_tours_mx_br (id INT, country VARCHAR(50), engagement_time INT); INSERT INTO virtual_tours_mx_br (id, country, engagement_time) VALUES (1, 'Mexico', 400), (2, 'Mexico', 500), (3, 'Brazil', 600), (4, 'Brazil', 700);", "sql": "SELECT country, AVG(engagement_time) FROM virtual_tours_mx_br WHERE country IN ('Mexico', 'Brazil') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the game for june 7", "schema": "CREATE TABLE table_18894744_5 (game VARCHAR, date VARCHAR)", "sql": "SELECT game FROM table_18894744_5 WHERE date = 'June 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Which mobile subscribers have not updated their billing address in the last 6 months?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, first_name VARCHAR(50), last_name VARCHAR(50), billing_address VARCHAR(100), last_updated_date DATE);", "sql": "SELECT subscriber_id, first_name, last_name, billing_address FROM mobile_subscribers WHERE last_updated_date < DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 348).", "schema": null, "sql": "SELECT * FROM check_test(\n set_hasnt( 'anames', 'others' ),\n true,\n 'set_hasnt( prepared, prepared, description )',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What grid did the team racing professionals race on?", "schema": "CREATE TABLE table_name_84 (grid VARCHAR, team VARCHAR)", "sql": "SELECT grid FROM table_name_84 WHERE team = 'racing professionals';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the game on august 30, 1968?", "schema": "CREATE TABLE table_name_69 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_69 WHERE date = 'august 30, 1968';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which ethical manufacturing certifications have been awarded to factories in the Asia-Pacific region?", "schema": "CREATE TABLE factories (factory_id INT, name TEXT, location TEXT); CREATE TABLE certifications (certification_id INT, name TEXT, factory_id INT, region TEXT); INSERT INTO factories (factory_id, name, location) VALUES (1, 'Flex Factory', 'Japan'), (2, 'GreenTech Plant', 'Germany'), (3, 'Smart Manufacturing Co.', 'China'); INSERT INTO certifications (certification_id, name, factory_id, region) VALUES (1, 'Fair Trade', 1, 'Asia-Pacific'), (2, 'SA8000', 3, 'Asia-Pacific'), (3, 'BSCI', 2, 'Europe');", "sql": "SELECT factories.name, certifications.name FROM factories INNER JOIN certifications ON factories.factory_id = certifications.factory_id WHERE certifications.region = 'Asia-Pacific' AND certifications.name IN ('Fair Trade', 'SA8000', 'BSCI');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 241, "num_statements": 1} {"question": "PostgreSQL regression test 'tsrf': Write the SELECT query (example 40).", "schema": null, "sql": "SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g);", "explanation": "Regression test for Tsrf in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 96, "num_statements": 1} {"question": "Which mobile subscribers have not updated their billing information in the last 6 months?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, name VARCHAR(50), billing_updated_date DATE); INSERT INTO mobile_subscribers (subscriber_id, name, billing_updated_date) VALUES (1, 'John Doe', '2021-01-15'); INSERT INTO mobile_subscribers (subscriber_id, name, billing_updated_date) VALUES (2, 'Jane Smith', '2021-07-22');", "sql": "SELECT * FROM mobile_subscribers WHERE billing_updated_date <= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 161).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD VIEW pg_all_foreign_keys;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total value of assets under management for each risk level as of the last day of the previous quarter?", "schema": "CREATE TABLE portfolios (portfolio_id INT, customer_id INT, risk_level VARCHAR(10), assets_value DECIMAL(10, 2), portfolio_date DATE); INSERT INTO portfolios (portfolio_id, customer_id, risk_level, assets_value, portfolio_date) VALUES (1, 1, 'low', 10000, '2021-01-01'), (2, 2, 'high', 50000, '2021-02-01'), (3, 3, 'medium', 25000, '2021-03-01'), (4, 4, 'low', 15000, '2021-04-01');", "sql": "SELECT risk_level, SUM(assets_value) FROM portfolios WHERE portfolio_date = LAST_DAY(DATE_SUB(CURDATE(), INTERVAL 3 MONTH)) GROUP BY risk_level;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the nation when the world rank is 6 and the birth date is 1971-07-31?", "schema": "CREATE TABLE table_name_88 (nation VARCHAR, world_rank VARCHAR, birth_date VARCHAR)", "sql": "SELECT nation FROM table_name_88 WHERE world_rank = '6' AND birth_date = '1971-07-31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which text has a date of c. 1180?", "schema": "CREATE TABLE table_name_13 (text VARCHAR, date__ce_ VARCHAR)", "sql": "SELECT text FROM table_name_13 WHERE date__ce_ = 'c. 1180';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the competition for australia 13, new zealand 40, drawn 3", "schema": "CREATE TABLE table_20251343_4 (competition VARCHAR, head_to_head VARCHAR)", "sql": "SELECT competition FROM table_20251343_4 WHERE head_to_head = 'Australia 13, New Zealand 40, Drawn 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'brin_bloom' (example 6).", "schema": null, "sql": "CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (\n\tbyteacol bytea_bloom_ops(false_positive_rate = 0.26)\n);", "explanation": "DDL from PostgreSQL core regression test for Brin Bloom.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Display data from 'top_sustainable_suppliers' view", "schema": "CREATE TABLE suppliers( supplier_id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), sustainability_score INT); INSERT INTO suppliers (supplier_id, name, location, sustainability_score) VALUES (1, 'Supplier A', 'City A', 80), (2, 'Supplier B', 'City B', 85), (3, 'Supplier C', 'City C', 90), (4, 'Supplier D', 'City D', 95), (5, 'Supplier E', 'City E', 100); CREATE VIEW top_sustainable_suppliers AS SELECT * FROM suppliers WHERE sustainability_score >= 90; SELECT * FROM top_sustainable_suppliers;", "sql": "SELECT * FROM top_sustainable_suppliers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the sum of league goals where the toal was 8 and league cup goals were larger than 0?", "schema": "CREATE TABLE table_name_80 (league_goals INTEGER, total_goals VARCHAR, league_cup_goals VARCHAR)", "sql": "SELECT SUM(league_goals) FROM table_name_80 WHERE total_goals = 8 AND league_cup_goals > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Find the total sales of cruelty-free cosmetics in the first half of 2021, grouped by region.", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), is_cruelty_free BOOLEAN, region VARCHAR(50), sales INT, launch_year INT, launch_quarter INT); INSERT INTO products (product_id, product_name, is_cruelty_free, region, sales, launch_year, launch_quarter) VALUES (1, 'Lipstick', true, 'USA', 500, 2021, 1), (2, 'Mascara', false, 'Canada', 700, 2020, 4), (3, 'Foundation', true, 'USA', 800, 2021, 2), (4, 'Eyeshadow', true, 'USA', 600, 2020, 3), (5, 'Blush', false, 'Canada', 400, 2021, 1);", "sql": "SELECT region, SUM(sales) FROM products WHERE is_cruelty_free = true AND launch_year = 2021 AND launch_quarter BETWEEN 1 AND 2 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for Jock Hutchison?", "schema": "CREATE TABLE table_name_89 (score VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_89 WHERE player = 'jock hutchison';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Insert new contract records for contracts awarded in Q4 2021", "schema": "CREATE TABLE Contracts (ID INT, Vendor TEXT, Amount DECIMAL(10,2), Quarter INT); INSERT INTO Contracts (ID, Vendor, Amount, Quarter) VALUES (1, 'Vendor A', 150000.00, 4), (2, 'Vendor B', 250000.00, 4);", "sql": "INSERT INTO Contracts (ID, Vendor, Amount, Quarter) VALUES (3, 'Vendor C', 300000.00, 4), (4, 'Vendor D', 100000.00, 4);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Network has 16.0 million Viewers?", "schema": "CREATE TABLE table_name_67 (network VARCHAR, viewers VARCHAR)", "sql": "SELECT network FROM table_name_67 WHERE viewers = '16.0 million';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which release had 6 DVDs?", "schema": "CREATE TABLE table_25721_4 (release VARCHAR, _number_of_discs VARCHAR)", "sql": "SELECT release FROM table_25721_4 WHERE _number_of_discs = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which mm × mm has an in × in of 11 × 17?", "schema": "CREATE TABLE table_name_7 (mm_×_mm VARCHAR, in_×_in VARCHAR)", "sql": "SELECT mm_×_mm FROM table_name_7 WHERE in_×_in = '11 × 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Long has redshirt for its Avg/G?", "schema": "CREATE TABLE table_name_97 (long VARCHAR, avg_g VARCHAR)", "sql": "SELECT long FROM table_name_97 WHERE avg_g = 'redshirt';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Delete all sales records with a quantity of 200 from the 'sales_data' table.", "schema": "CREATE TABLE sales_data (sales_id INT, drug_name VARCHAR(255), quantity_sold INT, sales_date DATE, region VARCHAR(255));", "sql": "DELETE FROM sales_data WHERE quantity_sold = 200;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the average water usage per person in the Northeast region, grouped by state?", "schema": "CREATE TABLE northeast_water_usage(state VARCHAR(20), usage NUMERIC(10,2), population NUMERIC(10,2)); INSERT INTO northeast_water_usage VALUES ('Maine', 50.67, 1345678), ('New Hampshire', 60.78, 1456789), ('Vermont', 70.89, 1567890), ('Massachusetts', 80.90, 1678901), ('Rhode Island', 90.10, 1789012), ('Connecticut', 100.20, 1890123);", "sql": "SELECT state, AVG(usage) FROM northeast_water_usage GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Get creative AI applications using the 'Deep Learning' technology", "schema": "CREATE TABLE creative_apps_2 (id INT, name VARCHAR(255), type VARCHAR(255), technology VARCHAR(255)); INSERT INTO creative_apps_2 (id, name, type, technology) VALUES (1, 'DeepArt', 'Art Generation', 'Deep Learning'), (2, 'DeepSpeech', 'Speech Recognition', 'Deep Learning');", "sql": "SELECT * FROM creative_apps_2 WHERE technology = 'Deep Learning';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many new managers replaced manager(s) who resigned?", "schema": "CREATE TABLE table_28164986_4 (incoming_manager VARCHAR, manner_of_departure VARCHAR)", "sql": "SELECT COUNT(incoming_manager) FROM table_28164986_4 WHERE manner_of_departure = 'Resigned';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "How many unique strains are available in TX dispensaries with organic in their name?", "schema": "CREATE TABLE strains (id INT, name TEXT, dispensary_id INT); INSERT INTO strains (id, name, dispensary_id) VALUES (1, 'Strain A', 1), (2, 'Strain B', 2), (3, 'Strain C', 3); CREATE TABLE dispensaries (id INT, name TEXT, state TEXT); INSERT INTO dispensaries (id, name, state) VALUES (1, 'Organic Dispensary', 'Texas'), (2, 'Dispensary X', 'Texas'), (3, 'Dispensary Y', 'Texas');", "sql": "SELECT COUNT(DISTINCT s.name) FROM strains s JOIN dispensaries d ON s.dispensary_id = d.id WHERE d.state = 'Texas' AND d.name LIKE '%organic%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 132).", "schema": null, "sql": "SELECT '(-1,-1),(1,1)'::cube @> '(-1),(1)'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 77, "num_statements": 1} {"question": "What is the total number of water wells dug in \"Latin America\" since 2018?", "schema": "CREATE TABLE water_wells (id INT, project_id INT, location VARCHAR(255), construction_date DATE); INSERT INTO water_wells (id, project_id, location, construction_date) VALUES (1, 4001, 'Colombia', '2019-05-01'); INSERT INTO water_wells (id, project_id, location, construction_date) VALUES (2, 4002, 'Peru', '2018-02-01');", "sql": "SELECT COUNT(*) FROM water_wells WHERE location = 'Latin America' AND YEAR(construction_date) >= 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "How many players registered in 2022 play multiplayer online battle arena (MOBA) games on PC?", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(30), age INT, gender VARCHAR(10), country VARCHAR(30), registration_date DATE, platform VARCHAR(20));", "sql": "SELECT COUNT(*) FROM players WHERE YEAR(registration_date) = 2022 AND genre = 'MOBA' AND platform = 'PC';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'fast_default' (example 44).", "schema": null, "sql": "INSERT INTO T VALUES (19), (20);", "explanation": "DML from PostgreSQL core regression test for Fast Default.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the total number of exhibitions in each city?", "schema": "CREATE TABLE Exhibitions (id INT, city VARCHAR(20), visitors INT); INSERT INTO Exhibitions (id, city, visitors) VALUES (1, 'Paris', 3000), (2, 'London', 4000), (3, 'New York', 5000), (4, 'Paris', 2000), (5, 'London', 1000), (6, 'New York', 3000), (7, 'Tokyo', 4000), (8, 'Berlin', 5000), (9, 'Rome', 2000), (10, 'Tokyo', 3000);", "sql": "SELECT city, COUNT(id) FROM Exhibitions GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Office has a Party of dem, and a First Elected of 1991†?", "schema": "CREATE TABLE table_name_2 (office VARCHAR, party VARCHAR, first_elected VARCHAR)", "sql": "SELECT office FROM table_name_2 WHERE party = 'dem' AND first_elected = '1991†';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which region has the highest number of donors?", "schema": "CREATE TABLE donors (donor_id INT, donor_name VARCHAR(255), region VARCHAR(255)); INSERT INTO donors (donor_id, donor_name, region) VALUES (1001, 'John Smith', 'North America'), (1002, 'Marie Johnson', 'Europe'), (1003, 'Mario Rodriguez', 'South America'), (1004, 'Nguyen Tran', 'Asia');", "sql": "SELECT region, COUNT(*) as num_donors FROM donors GROUP BY region ORDER BY num_donors DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest league goals that have barry endean as the name, wirh FA cup apps greater than 0?", "schema": "CREATE TABLE table_name_46 (league_goals INTEGER, name VARCHAR, fa_cup_apps VARCHAR)", "sql": "SELECT MAX(league_goals) FROM table_name_46 WHERE name = 'barry endean' AND fa_cup_apps > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Incumbent, when District is \"Louisiana 4\"?", "schema": "CREATE TABLE table_name_7 (incumbent VARCHAR, district VARCHAR)", "sql": "SELECT incumbent FROM table_name_7 WHERE district = 'louisiana 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "List all unique attorney last names who have billed for cases in the 'Immigration' case type, in descending order based on the number of cases they handled.", "schema": "CREATE TABLE ImmigrationCases (CaseID INT, CaseType VARCHAR(20), AttorneyLastName VARCHAR(20), BillingAmount DECIMAL(10,2)); INSERT INTO ImmigrationCases (CaseID, CaseType, AttorneyLastName, BillingAmount) VALUES (1, 'Immigration', 'Garcia', 5000.00), (2, 'Immigration', 'Rodriguez', 3000.00), (3, 'Immigration', 'Garcia', 2000.00);", "sql": "SELECT DISTINCT AttorneyLastName, COUNT(*) AS CaseCount FROM ImmigrationCases WHERE CaseType = 'Immigration' GROUP BY AttorneyLastName ORDER BY CaseCount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for set 2 when the time is 18:00 and the score of set 1 is 18–25?", "schema": "CREATE TABLE table_name_48 (set_2 VARCHAR, time VARCHAR, set_1 VARCHAR)", "sql": "SELECT set_2 FROM table_name_48 WHERE time = '18:00' AND set_1 = '18–25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season was played at Safeco Field, against the Boston Red Sox, with a decision of L?", "schema": "CREATE TABLE table_name_60 (season VARCHAR, opponent VARCHAR, location VARCHAR, decision VARCHAR)", "sql": "SELECT season FROM table_name_60 WHERE location = 'safeco field' AND decision = 'l' AND opponent = 'boston red sox';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average water temperature in fish farms located in the Mediterranean and Aegean seas?", "schema": "CREATE TABLE Farm(id INT, location VARCHAR(50), temperature FLOAT); INSERT INTO Farm(id, location, temperature) VALUES (1, 'Mediterranean Sea', 22.5), (2, 'Aegean Sea', 21.3);", "sql": "SELECT AVG(temperature) FROM Farm WHERE location LIKE '%Mediterranean Sea%' OR location LIKE '%Aegean Sea%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many dominant religions were in the settlement that had a population of 17105?", "schema": "CREATE TABLE table_2562572_53 (dominant_religion__2002_ VARCHAR, population__2011_ VARCHAR)", "sql": "SELECT COUNT(dominant_religion__2002_) FROM table_2562572_53 WHERE population__2011_ = 17105;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 14).", "schema": null, "sql": "SELECT count(*) FROM inettmp WHERE a > '89.225.196.191'::inet;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the manufacturer for Matt Kenseth on March 7?", "schema": "CREATE TABLE table_2196127_1 (manufacturer VARCHAR, driver VARCHAR, date VARCHAR)", "sql": "SELECT manufacturer FROM table_2196127_1 WHERE driver = 'Matt Kenseth' AND date = 'March 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 80).", "schema": null, "sql": "SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the maximum number of infectious disease cases reported in a month in rural areas?", "schema": "CREATE TABLE infectious_disease_tracking (id INT, location TEXT, cases_per_month INT, month TEXT); INSERT INTO infectious_disease_tracking (id, location, cases_per_month, month) VALUES (1, 'Rural A', 10, 'January'), (2, 'Rural B', 15, 'February'), (3, 'Rural A', 20, 'March');", "sql": "SELECT MAX(cases_per_month) FROM infectious_disease_tracking WHERE location = 'rural';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How often does a train leave sealdah?", "schema": "CREATE TABLE table_12095519_1 (frequency VARCHAR, origin VARCHAR)", "sql": "SELECT frequency FROM table_12095519_1 WHERE origin = 'Sealdah';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the mintage when the theme was Santa Claus?", "schema": "CREATE TABLE table_name_15 (mintage VARCHAR, theme VARCHAR)", "sql": "SELECT mintage FROM table_name_15 WHERE theme = 'santa claus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'matview': Write the SELECT query (example 108).", "schema": null, "sql": "SELECT * FROM mvtest_mv_v;", "explanation": "Regression test for Matview in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM mvtest_mv_v) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "List all peacekeeping operations in Africa with more than 5,000 personnel.", "schema": "CREATE TABLE peacekeeping_operations (operation_id INT, name TEXT, location TEXT, personnel INT); INSERT INTO peacekeeping_operations (operation_id, name, location, personnel) VALUES (1, 'MINUSCA', 'Central African Republic', 12000), (2, 'MONUSCO', 'Democratic Republic of the Congo', 16000);", "sql": "SELECT name FROM peacekeeping_operations WHERE location LIKE 'Africa%' AND personnel > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the venue where Chuck Klein played?", "schema": "CREATE TABLE table_name_85 (venue VARCHAR, player VARCHAR)", "sql": "SELECT venue FROM table_name_85 WHERE player = 'chuck klein';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "List diversity metrics for companies with a patent count greater than 2.", "schema": "CREATE TABLE company_demographics (company_id INT, patent_count INT, female_percent FLOAT, minority_percent FLOAT); INSERT INTO company_demographics (company_id, patent_count, female_percent, minority_percent) VALUES (1, 3, 0.4, 0.3), (2, 1, 0.5, 0.2), (3, 2, 0.35, 0.45), (4, 4, 0.6, 0.1), (5, 0, 0.7, 0.2);", "sql": "SELECT patent_count, female_percent, minority_percent FROM company_demographics WHERE patent_count > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which College/junior/club team has a Pick # of 1?", "schema": "CREATE TABLE table_name_27 (college_junior_club_team VARCHAR, pick__number VARCHAR)", "sql": "SELECT college_junior_club_team FROM table_name_27 WHERE pick__number = '1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average income in the city of 'San Francisco'?", "schema": "CREATE TABLE city (name VARCHAR(255), income FLOAT); INSERT INTO city (name, income) VALUES ('San Francisco', 96000), ('Oakland', 72000);", "sql": "SELECT AVG(income) FROM city WHERE name = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Update the type of disaster for disaster with ID 1 to 'Super Typhoon'", "schema": "CREATE TABLE disasters (id INT PRIMARY KEY, name VARCHAR(50), location VARCHAR(50), type VARCHAR(50), start_date DATE, end_date DATE);", "sql": "UPDATE disasters SET type = 'Super Typhoon' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Minimum risk rating of investments in LATAM for a given fund?", "schema": "CREATE TABLE fund_investment_risks(fund_id INT, investment_id INT, risk_rating INT, region VARCHAR(10));", "sql": "SELECT MIN(risk_rating) FROM fund_investment_risks WHERE fund_id = 1 AND region = 'LATAM';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the maximum amount of shelter support provided to a single household in Iraq?", "schema": "CREATE TABLE shelter_support (id INT, country VARCHAR(255), household_id INT, amount FLOAT); INSERT INTO shelter_support (id, country, household_id, amount) VALUES (1, 'Iraq', 1001, 500), (2, 'Iraq', 1002, 800), (3, 'Syria', 1003, 900);", "sql": "SELECT MAX(amount) FROM shelter_support WHERE country = 'Iraq';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show all tables related to algorithmic fairness.", "schema": "CREATE TABLE algorithmic_fairness (table_name VARCHAR(255)); INSERT INTO algorithmic_fairness (table_name) VALUES ('disparate_impact'), ('equal_opportunity'), ('demographic_parity');", "sql": "SELECT table_name FROM algorithmic_fairness;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 607).", "schema": null, "sql": "select jsonb_path_query('[]', '$.timestamp_tz()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[]', '$.timestamp_tz()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Loses has Points smaller than 6?", "schema": "CREATE TABLE table_name_38 (loses INTEGER, points INTEGER)", "sql": "SELECT AVG(loses) FROM table_name_38 WHERE points < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What the number of matches when the BBI is 3/27?", "schema": "CREATE TABLE table_28846752_9 (matches INTEGER, bbi VARCHAR)", "sql": "SELECT MIN(matches) FROM table_28846752_9 WHERE bbi = '3/27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total sales revenue for each pharmaceutical company in Brazil for the year 2019?", "schema": "CREATE TABLE SalesRevenue (Company VARCHAR(50), Country VARCHAR(50), Revenue INT, Year INT); INSERT INTO SalesRevenue (Company, Country, Revenue, Year) VALUES ('Eurofarma', 'Brazil', 1000000, 2019), ('Aché', 'Brazil', 1200000, 2019), ('Hypera', 'Brazil', 1500000, 2019), ('Grupo Sanofi', 'Brazil', 1800000, 2019);", "sql": "SELECT Company, SUM(Revenue) FROM SalesRevenue WHERE Country = 'Brazil' AND Year = 2019 GROUP BY Company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "How many users from each country are there in the users table?", "schema": "CREATE TABLE users (id INT, name VARCHAR(20), country VARCHAR(20), last_login TIMESTAMP); INSERT INTO users (id, name, country, last_login) VALUES (3, 'Bob', 'UK', '2021-01-04 10:00:00');", "sql": "SELECT country, COUNT(*) as users FROM users GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did Corbari Italia earn in 1985?", "schema": "CREATE TABLE table_name_90 (points VARCHAR, entrant VARCHAR, year VARCHAR)", "sql": "SELECT points FROM table_name_90 WHERE entrant = 'corbari italia' AND year = 1985;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_role' (example 36).", "schema": null, "sql": "CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo';", "explanation": "DDL from PostgreSQL core regression test for Create Role.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the number of open pedagogy projects completed by students in each department in the last 6 months?", "schema": "CREATE TABLE op_projects (project_id INT, department_id INT, completed BOOLEAN, completion_date DATE); INSERT INTO op_projects (project_id, department_id, completed, completion_date) VALUES (1, 501, true, '2022-01-01'), (2, 501, false, '2021-01-01'), (3, 502, true, '2022-03-01'), (4, 502, false, '2021-04-01');", "sql": "SELECT department_id, COUNT(project_id) as num_projects_completed FROM op_projects WHERE completion_date >= DATEADD(month, -6, CURRENT_TIMESTAMP) AND completed = true GROUP BY department_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "What is the total number of artworks in the 'Artworks' table for each year?", "schema": "CREATE TABLE Artworks (id INT, art_category VARCHAR(255), artist_name VARCHAR(255), year INT, art_medium VARCHAR(255), price DECIMAL(10,2));", "sql": "SELECT year, COUNT(*) as total FROM Artworks GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show countries involved in more than 5 military technologies", "schema": "CREATE TABLE CountryMilitary (Country VARCHAR(50) PRIMARY KEY, Technology VARCHAR(50));", "sql": "SELECT Country FROM CountryMilitary GROUP BY Country HAVING COUNT(*) > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Silver with a Total that is smaller than 1?", "schema": "CREATE TABLE table_name_97 (silver INTEGER, total INTEGER)", "sql": "SELECT AVG(silver) FROM table_name_97 WHERE total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show all reservoirs with capacity > 1000", "schema": "CREATE TABLE oil_reservoirs (reservoir_id INT, reservoir_name VARCHAR(100), location VARCHAR(100), oil_capacity FLOAT); INSERT INTO oil_reservoirs (reservoir_id, reservoir_name, location, oil_capacity) VALUES (1, 'Girassol', 'Angola', 800), (2, 'Jazireh-e-Jafar', 'Iran', 1500), (3, 'Thunder Horse', 'Gulf of Mexico', 1200), (4, 'Kashagan', 'Caspian Sea', 1100);", "sql": "SELECT * FROM oil_reservoirs WHERE oil_capacity > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 54).", "schema": null, "sql": "insert into rtest_t2 values (3, 23);", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Delete all TV shows with a runtime less than 30 minutes.", "schema": "CREATE TABLE shows (id INT, title TEXT, runtime INT); INSERT INTO shows (id, title, runtime) VALUES (1, 'Show 1', 60); CREATE TABLE tv_shows (id INT, show_id INT, episodes INT); INSERT INTO tv_shows (id, show_id, episodes) VALUES (1, 1, 10);", "sql": "DELETE FROM shows WHERE id IN (SELECT s.id FROM shows s INNER JOIN tv_shows t ON s.id = t.show_id WHERE s.runtime < 30);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the total number of orders shipped to the United States that contain eco-friendly materials?", "schema": "CREATE TABLE orders (id INT, order_value DECIMAL(10,2), eco_friendly BOOLEAN, country VARCHAR(50)); INSERT INTO orders (id, order_value, eco_friendly, country) VALUES (1, 150.50, TRUE, 'USA'), (2, 75.20, FALSE, 'Canada'), (3, 225.00, TRUE, 'USA');", "sql": "SELECT COUNT(*) FROM orders WHERE eco_friendly = TRUE AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team was the away team when home team was essendon?", "schema": "CREATE TABLE table_29090919_1 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_29090919_1 WHERE home_team = 'Essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Find the average funding amount for startups founded by individuals from underrepresented countries", "schema": "CREATE TABLE underrepresented_countries (company_name VARCHAR(100), founder_country VARCHAR(50), founding_year INT, funding_amount INT);", "sql": "SELECT AVG(funding_amount) FROM underrepresented_countries;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "List the names of games that are not played by any player and their respective game IDs.", "schema": "CREATE TABLE GameLibrary (GameID INT, PlayerID INT); INSERT INTO GameLibrary (GameID, PlayerID) VALUES (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (3, 2), (3, 3), (4, 1), (4, 5), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (5, 7), (5, 8), (5, 9);", "sql": "SELECT GameTitle, GameID FROM GameDesign WHERE GameID NOT IN (SELECT GameID FROM GameLibrary);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the to par for retief goosen?", "schema": "CREATE TABLE table_name_93 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_93 WHERE player = 'retief goosen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Traditional Chinese of 国殇 which is over 9?", "schema": "CREATE TABLE table_name_14 (traditional_chinese VARCHAR, standard_order VARCHAR, simplified_chinese VARCHAR)", "sql": "SELECT traditional_chinese FROM table_name_14 WHERE standard_order > 9 AND simplified_chinese = '国殇';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Delete all records of expeditions that reached a depth greater than 6000 meters?", "schema": "CREATE TABLE Expeditions(ExpeditionID INT, LeaderName VARCHAR(20), MaxDepth INT); INSERT INTO Expeditions(ExpeditionID, LeaderName, MaxDepth) VALUES (1, 'Alice', 6500), (2, 'Bob', 4200), (3, 'Charlie', 2100), (4, 'Dana', 5100), (5, 'Eve', 7000);", "sql": "DELETE FROM Expeditions WHERE MaxDepth > 6000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 108).", "schema": null, "sql": "SELECT '42'::money * '-inf'::float8;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '42'::money * '-inf'::float8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the average weight of chemicals produced in the Asia Pacific region, grouped by chemical category?", "schema": "CREATE TABLE chemicals (id INT, name VARCHAR(255), weight FLOAT, region VARCHAR(255));", "sql": "SELECT category, AVG(weight) as avg_weight FROM (SELECT chemicals.name as category, AVG(weight) as weight FROM chemicals WHERE region = 'Asia Pacific' GROUP BY chemicals.name) as subquery GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Show an example of PostgreSQL RELEASE SAVEPOINT (example 1).", "schema": null, "sql": "BEGIN; INSERT INTO table1 VALUES (3); SAVEPOINT my_savepoint; INSERT INTO table1 VALUES (4); RELEASE SAVEPOINT my_savepoint; COMMIT;", "explanation": "PostgreSQL RELEASE SAVEPOINT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 6} {"question": "Generate PostgreSQL SQL for: What is the car # of the Chevrolet that complete 363 laps?", "schema": "CREATE TABLE table_name_68 (car__number VARCHAR, make VARCHAR, laps VARCHAR)", "sql": "SELECT COUNT(car__number) FROM table_name_68 WHERE make = 'chevrolet' AND laps = 363;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the Frequency that has a fairview City of license", "schema": "CREATE TABLE table_name_32 (frequency VARCHAR, city_of_license VARCHAR)", "sql": "SELECT frequency FROM table_name_32 WHERE city_of_license = 'fairview';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What episode number in the series was directed by John Behring?", "schema": "CREATE TABLE table_21164557_1 (no_in_series INTEGER, directed_by VARCHAR)", "sql": "SELECT MAX(no_in_series) FROM table_21164557_1 WHERE directed_by = 'John Behring';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_rewrite': Write the SELECT query (example 88).", "schema": null, "sql": "SELECT last_analyze AS last_vacuum_analyze\n FROM pg_stat_all_tables WHERE relname = 'test_timestamp' \\gset\nALTER TABLE test_timestamp ALTER COLUMN a TYPE bigint;", "explanation": "Regression test for Stats Rewrite in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT last_analyze AS last_vacuum_analyze\n FROM pg_stat_all_tables WHERE relname = 'test_timestamp' \\gset\nALTER TABLE test_timestamp ALTER COLUMN a TYPE bigint) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 75).", "schema": null, "sql": "SELECT name FROM srt WHERE name !~ 'A$' ORDER BY name;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Update the name of the founder of the company with id 3 to 'Alex'", "schema": "CREATE TABLE founder (id INT, name TEXT);", "sql": "UPDATE company SET founder_gender = 'Other' WHERE id = 3; UPDATE founder SET name = 'Alex' WHERE id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 2} {"question": "Find the total number of games played in NHL where the home team won, for the 2021-2022 season.", "schema": "CREATE TABLE NHL_Matches (Season VARCHAR(50), HomeTeam VARCHAR(50), AwayTeam VARCHAR(50), HomeTeamScore INT, AwayTeamScore INT); INSERT INTO NHL_Matches (Season, HomeTeam, AwayTeam, HomeTeamScore, AwayTeamScore) VALUES ('2021-2022', 'Toronto Maple Leafs', 'Montreal Canadiens', 4, 2);", "sql": "SELECT SUM(HomeTeamScore > AwayTeamScore) FROM NHL_Matches WHERE Season = '2021-2022';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the second-deepest marine trench?", "schema": "CREATE TABLE marine_trenches (name TEXT, depth FLOAT); INSERT INTO marine_trenches (name, depth) VALUES ('Mariana Trench', 36000); INSERT INTO marine_trenches (name, depth) VALUES ('Tonga Trench', 35000); INSERT INTO marine_trenches (name, depth) VALUES ('Kermadec Trench', 32000); INSERT INTO marine_trenches (name, depth) VALUES ('Sunda Trench', 31000);", "sql": "SELECT name, depth FROM (SELECT name, depth, ROW_NUMBER() OVER (ORDER BY depth DESC) AS rn FROM marine_trenches) AS sub WHERE rn = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 1} {"question": "What is the total number of songs produced by artists from the United States?", "schema": "CREATE TABLE artists (id INT, name TEXT, country TEXT); INSERT INTO artists (id, name, country) VALUES (1, 'Taylor Swift', 'United States'), (2, 'Eminem', 'United States'); CREATE TABLE songs (id INT, title TEXT, artist_id INT); INSERT INTO songs (id, title, artist_id) VALUES (1, 'Shake it Off', 1), (2, 'Lose Yourself', 2);", "sql": "SELECT COUNT(*) FROM songs JOIN artists ON songs.artist_id = artists.id WHERE artists.country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "CREATE FUNCTION latitude(earth)\nRETURNS float8\nLANGUAGE SQL\nIMMUTABLE STRICT\nPARALLEL SAFE\nAS 'SELECT CASE WHEN cube_ll_coord($1, 3)/earth() < -1 THEN -90::float8 WHEN cube_ll_coord($1, 3)/earth() > 1 THEN 90::float8 ELSE degrees(asin(cube_ll_coord($1, 3)/earth())) END';", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 271, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'case' (example 47).", "schema": null, "sql": "CREATE FUNCTION inline_eq(foodomain, foodomain) returns boolean as\n 'SELECT CASE $2::text WHEN $1::text THEN true ELSE false END' language sql;", "explanation": "DDL from PostgreSQL core regression test for Case.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Display the number of underwater volcanoes in the Atlantic Ocean.", "schema": "CREATE TABLE underwater_volcanoes (name TEXT, location TEXT, depth INT); INSERT INTO underwater_volcanoes (name, location, depth) VALUES ('Volcano 1', 'Atlantic Ocean', '3000'), ('Volcano 2', 'Pacific Ocean', '4000');", "sql": "SELECT COUNT(*) FROM underwater_volcanoes WHERE location = 'Atlantic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par for the score 72-69-68=209?", "schema": "CREATE TABLE table_name_11 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_11 WHERE score = 72 - 69 - 68 = 209;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the original air date of the episode directed by frederick e. o. toye?", "schema": "CREATE TABLE table_25923164_1 (original_air_date VARCHAR, directed_by VARCHAR)", "sql": "SELECT original_air_date FROM table_25923164_1 WHERE directed_by = 'Frederick E. O. Toye';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 65).", "schema": null, "sql": "CREATE SEQUENCE pub_test.regress_pub_seq1;", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What are the total sales for vintage garments at each retail store?", "schema": "CREATE TABLE sales_vintage (id INT, garment VARCHAR(255), retail_store VARCHAR(255), sale_date DATE, quantity INT, sales_price DECIMAL(5,2)); INSERT INTO sales_vintage (id, garment, retail_store, sale_date, quantity, sales_price) VALUES (1, 'vintage_t-shirt', 'London Fashion', '2021-03-01', 20, 25.99); INSERT INTO sales_vintage (id, garment, retail_store, sale_date, quantity, sales_price) VALUES (2, 'vintage_jeans', 'Los Angeles Boutique', '2021-04-01', 12, 49.99);", "sql": "SELECT retail_store, SUM(quantity * sales_price) as total_sales FROM sales_vintage WHERE garment LIKE '%vintage%' GROUP BY retail_store;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the total number of electric vehicle charging stations by country and state?", "schema": "CREATE TABLE ElectricVehicleChargingStationsByRegion(Country VARCHAR(50), State VARCHAR(50), Stations INT);", "sql": "SELECT Country, State, SUM(Stations) FROM ElectricVehicleChargingStationsByRegion GROUP BY Country, State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Get the names of publishers who have never published an article on 'Sunday'.", "schema": "CREATE TABLE articles (id INT, title TEXT, publication_day TEXT, publisher TEXT);", "sql": "SELECT DISTINCT publisher FROM articles WHERE publication_day != 'Sunday';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the average water usage per capita in drought-impacted areas since 2015?", "schema": "CREATE TABLE drought_impact (id INT, area VARCHAR(50), year INT, PRIMARY KEY(id, year)); INSERT INTO drought_impact (id, area, year) VALUES (1, 'California', 2015), (1, 'California', 2016), (1, 'California', 2017), (2, 'Texas', 2015), (2, 'Texas', 2016), (2, 'Texas', 2017), (3, 'Australia', 2015), (3, 'Australia', 2016), (3, 'Australia', 2017); CREATE TABLE water_usage (id INT, area VARCHAR(50), year INT, usage FLOAT, PRIMARY KEY(id, year), FOREIGN KEY (id) REFERENCES drought_impact(id)); INSERT INTO water_usage (id, area, year, usage) VALUES (1, 'California', 2015, 150), (1, 'California', 2016, 140), (1, 'California', 2017, 130), (2, 'Texas', 2015, 200), (2, 'Texas', 2016, 210), (2, 'Texas', 2017, 220), (3, 'Australia', 2015, 300), (3, 'Australia', 2016, 310), (3, 'Australia', 2017, 320);", "sql": "SELECT AVG(wu.usage) as avg_usage FROM drought_impact di JOIN water_usage wu ON di.id = wu.id WHERE di.year >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Calculate the average fare for each public transportation mode", "schema": "CREATE TABLE fare (fare_id INT, mode VARCHAR(10), fare DECIMAL(10, 2));", "sql": "SELECT mode, AVG(fare) AS avg_fare FROM fare GROUP BY mode;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest year for stage 12, category 1?", "schema": "CREATE TABLE table_name_33 (year INTEGER, category VARCHAR, stage VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_33 WHERE category = '1' AND stage = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Find the average depth of all marine life research sites", "schema": "CREATE TABLE marine_sites (site_id INT, site_name VARCHAR(255), longitude DECIMAL(9,6), latitude DECIMAL(9,6), depth DECIMAL(5,2));", "sql": "SELECT AVG(depth) FROM marine_sites;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest against when the draws are more than 0 and the losses are less than 3?", "schema": "CREATE TABLE table_name_74 (against INTEGER, losses VARCHAR, draws VARCHAR)", "sql": "SELECT MIN(against) FROM table_name_74 WHERE losses < 3 AND draws > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Which animal has the lowest population?", "schema": "CREATE TABLE animal_population (id INT, animal_name VARCHAR(50), population INT); INSERT INTO animal_population (id, animal_name, population) VALUES (1, 'Tiger', 2500), (2, 'Elephant', 5000), (3, 'Lion', 3000);", "sql": "SELECT animal_name, MIN(population) FROM animal_population;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Loss of postponed (rain) rescheduled for may 10 had what record?", "schema": "CREATE TABLE table_name_60 (record VARCHAR, loss VARCHAR)", "sql": "SELECT record FROM table_name_60 WHERE loss = 'postponed (rain) rescheduled for may 10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 18).", "schema": null, "sql": "select pgp_sym_decrypt(\n\tpgp_sym_encrypt('Secret.', 'key', 'sess-key=1, cipher-algo=aes192'),\n\t'key', 'expect-sess-key=1, expect-cipher-algo=aes192');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the soccer stadium in NL providence?", "schema": "CREATE TABLE table_27369069_1 (soccer_stadium VARCHAR, province VARCHAR)", "sql": "SELECT soccer_stadium FROM table_27369069_1 WHERE province = 'NL';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average emergency response time in Houston?", "schema": "CREATE TABLE houston_emergency_responses (id INT, response_time INT, location VARCHAR(20)); INSERT INTO houston_emergency_responses (id, response_time, location) VALUES (1, 120, 'Houston'), (2, 90, 'Houston');", "sql": "SELECT AVG(response_time) FROM houston_emergency_responses WHERE location = 'Houston';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Calculate the median age of members who have participated in yoga workouts.", "schema": "CREATE TABLE MemberWorkout (member_id INT, workout_id INT); INSERT INTO MemberWorkout (member_id, workout_id) VALUES (1001, 3001);", "sql": "SELECT AVG(DATEDIFF('day', dob, CURDATE()))/365 as median_age FROM Member m JOIN MemberWorkout mw ON m.id = mw.member_id JOIN WorkoutType wt ON mw.workout_id = wt.id WHERE wt.workout_name = 'Yoga' GROUP BY m.id ORDER BY median_age;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 1} {"question": "What was the total budget allocated for each department in 2021?", "schema": "CREATE TABLE Budget (id INT, department VARCHAR(50), budget_amount DECIMAL(10,2), allocation_date DATE); INSERT INTO Budget (id, department, budget_amount, allocation_date) VALUES (1, 'Education', 50000, '2021-01-01'), (2, 'Health', 75000, '2021-04-15'), (3, 'Education', 60000, '2021-07-03'), (4, 'Health', 80000, '2021-10-17');", "sql": "SELECT department, SUM(budget_amount) as total_budget FROM Budget WHERE YEAR(allocation_date) = 2021 GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the greatest draw that has 4th for place?", "schema": "CREATE TABLE table_name_62 (draw INTEGER, place VARCHAR)", "sql": "SELECT MAX(draw) FROM table_name_62 WHERE place = '4th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the average budget for renewable energy projects in each country, excluding projects with a budget greater than 8000000?", "schema": "CREATE TABLE Renewable_Energy_Projects (id INT, project_name VARCHAR(50), budget FLOAT, country VARCHAR(50)); INSERT INTO Renewable_Energy_Projects (id, project_name, budget, country) VALUES (1, 'Solar Farm', 5000000, 'USA'), (2, 'Wind Farm', 7000000, 'Canada'), (3, 'Hydroelectric Plant', 6000000, 'Mexico'), (4, 'Geothermal Plant', 4000000, 'USA'), (5, 'Tidal Energy', 3000000, 'Canada');", "sql": "SELECT country, AVG(budget) FROM Renewable_Energy_Projects WHERE budget <= 8000000 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "PL/pgSQL test: Plperl (example 116).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION perl_spi_prepared_row(footype) RETURNS footype AS $$\n my $footype = shift;\n my $x = spi_prepare('select $1 AS a', 'footype');\n my $q = spi_exec_prepared( $x, {}, $footype );\n spi_freeplan($x);\nreturn $q->{rows}->[0]->{a};\n$$ LANGUAGE plperl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 276, "num_statements": 6} {"question": "Which vendors have a recycling program in place and are reducing waste?", "schema": "CREATE TABLE sustainability (id INT, vendor VARCHAR(50), recycling_program BOOLEAN, waste_reduction BOOLEAN); INSERT INTO sustainability (id, vendor, recycling_program, waste_reduction) VALUES (4, 'Local Produce', true, true), (5, 'Farm Fresh', false, true), (6, 'Organic Harvest', true, false);", "sql": "SELECT vendor, CASE WHEN recycling_program THEN 'Yes' ELSE 'No' END as recycling_program, CASE WHEN waste_reduction THEN 'Yes' ELSE 'No' END as waste_reduction FROM sustainability WHERE recycling_program = 'True' AND waste_reduction = 'True';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of customer who has the lowest credit score.", "schema": "CREATE TABLE customer (cust_name VARCHAR, credit_score VARCHAR)", "sql": "SELECT cust_name FROM customer ORDER BY credit_score LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total budget for successful community development initiatives in the 'community_development' table?", "schema": "CREATE TABLE community_development (id INT, initiative VARCHAR(50), budget FLOAT, status VARCHAR(20));", "sql": "SELECT SUM(budget) FROM community_development WHERE status = 'successful';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What horse has a 65.863 result?", "schema": "CREATE TABLE table_name_18 (horse VARCHAR, result VARCHAR)", "sql": "SELECT horse FROM table_name_18 WHERE result = '65.863';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Identify the top two community health workers with the most unique patients served who identify as Hispanic or Latino, along with the number of patients they have served.", "schema": "CREATE TABLE CommunityHealthWorker (ID INT, Name TEXT); INSERT INTO CommunityHealthWorker (ID, Name) VALUES (1, 'Maria Rodriguez'); INSERT INTO CommunityHealthWorker (ID, Name) VALUES (2, 'Jose Hernandez'); INSERT INTO CommunityHealthWorker (ID, Name) VALUES (3, 'Fatima Khan'); CREATE TABLE PatientCommunityHealthWorker (PatientID INT, CommunityHealthWorkerID INT, Ethnicity TEXT);", "sql": "SELECT CommunityHealthWorkerID, COUNT(DISTINCT PatientID) as PatientsServed FROM PatientCommunityHealthWorker WHERE Ethnicity = 'Hispanic or Latino' GROUP BY CommunityHealthWorkerID ORDER BY PatientsServed DESC LIMIT 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "How many broadband subscribers does the company have in 'Suburban' areas?", "schema": "CREATE TABLE subscribers (id INT, subscriber_type VARCHAR(20), location VARCHAR(20)); INSERT INTO subscribers (id, subscriber_type, location) VALUES (1, 'Broadband', 'Suburban');", "sql": "SELECT COUNT(*) FROM subscribers WHERE subscriber_type = 'Broadband' AND location = 'Suburban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Transfer window has a Name of tofas?", "schema": "CREATE TABLE table_name_27 (transfer_window VARCHAR, name VARCHAR)", "sql": "SELECT transfer_window FROM table_name_27 WHERE name = 'tofas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the minimum revenue generated by a restaurant in the \"asian\" cuisine type?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, cuisine TEXT, revenue FLOAT); INSERT INTO restaurants (id, name, cuisine, revenue) VALUES (1, 'Restaurant A', 'asian', 40000.00), (2, 'Restaurant B', 'italian', 50000.00), (3, 'Restaurant C', 'asian', 35000.00);", "sql": "SELECT MIN(revenue) FROM restaurants WHERE cuisine = 'asian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which ZX Spectrum has a Year larger than 1984, and a Genre of arcade/strategy?", "schema": "CREATE TABLE table_name_92 (zx_spectrum VARCHAR, year VARCHAR, genre VARCHAR)", "sql": "SELECT zx_spectrum FROM table_name_92 WHERE year > 1984 AND genre = 'arcade/strategy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score and game outcome when High Points was andre miller (17)?", "schema": "CREATE TABLE table_17323042_11 (score VARCHAR, high_points VARCHAR)", "sql": "SELECT score FROM table_17323042_11 WHERE high_points = 'Andre Miller (17)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "List defense projects with their start dates and contractors from the 'projects' and 'contractors' tables", "schema": "CREATE TABLE projects (id INT, project_name VARCHAR(255), start_date DATE, end_date DATE); CREATE TABLE contractors (id INT, contractor VARCHAR(255)); INSERT INTO projects (id, project_name, start_date, end_date) VALUES (1, 'Stealth Fighter Development', '2017-04-01', '2022-12-31'); INSERT INTO projects (id, project_name, start_date, end_date) VALUES (2, 'Missile Shield Upgrade', '2018-09-15', '2023-06-30'); INSERT INTO contractors (id, contractor) VALUES (1, 'Lockheed Martin'); INSERT INTO contractors (id, contractor) VALUES (2, 'Raytheon');", "sql": "SELECT projects.project_name, projects.start_date, contractors.contractor FROM projects INNER JOIN contractors ON projects.id = contractors.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "How many troops were deployed in peacekeeping missions in the first half of the years 2018 to 2020?", "schema": "CREATE TABLE peacekeeping_missions (id INT, year INT, quarter INT, troops INT); INSERT INTO peacekeeping_missions (id, year, quarter, troops) VALUES (1, 2018, 1, 4000), (2, 2018, 2, 5000), (3, 2019, 1, 5500), (4, 2019, 2, 6000), (5, 2020, 1, 6500), (6, 2020, 2, 7000);", "sql": "SELECT SUM(troops) FROM peacekeeping_missions WHERE quarter <= 2 AND year BETWEEN 2018 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Select the names and games of players who have a score higher than the average score in game A, and update their score to be 100 points higher.", "schema": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(50), Game VARCHAR(50), Score INT); INSERT INTO Players (PlayerID, Name, Game, Score) VALUES (1, 'John Doe', 'GameA', 1000); INSERT INTO Players (PlayerID, Name, Game, Score) VALUES (2, 'Jane Doe', 'GameB', 2000); INSERT INTO Players (PlayerID, Name, Game, Score) VALUES (3, 'Alice', 'GameA', 1500); INSERT INTO Players (PlayerID, Name, Game, Score) VALUES (4, 'Bob', 'GameA', 1200);", "sql": "UPDATE Players SET Score = Score + 100 WHERE Game = 'GameA' AND Score > (SELECT AVG(Score) FROM Players WHERE Game = 'GameA'); SELECT Name, Game, Score FROM Players WHERE Game = 'GameA' AND Score > (SELECT AVG(Score) FROM Players WHERE Game = 'GameA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What's the lowest bits precision when the total bits are less than 16?", "schema": "CREATE TABLE table_name_32 (bits_precision INTEGER, total_bits INTEGER)", "sql": "SELECT MIN(bits_precision) FROM table_name_32 WHERE total_bits < 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who does pregame analysts for TSN2 network when Taylor Twellman is the color commentator?", "schema": "CREATE TABLE table_name_74 (pregame_analysts VARCHAR, color_commentator VARCHAR, network VARCHAR)", "sql": "SELECT pregame_analysts FROM table_name_74 WHERE color_commentator = 'taylor twellman' AND network = 'tsn2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "How many ships were inspected in the 'Mediterranean' sea in the year 2019, and what was the total tonnage of those ships?", "schema": "CREATE TABLE ship_inspections (id INT, inspection_date DATE, region TEXT, tonnage INT); INSERT INTO ship_inspections (id, inspection_date, region, tonnage) VALUES (1, '2019-01-01', 'Mediterranean', 10000), (2, '2019-02-01', 'Mediterranean', 15000), (3, '2018-01-01', 'Mediterranean', 12000);", "sql": "SELECT COUNT(*), SUM(tonnage) FROM ship_inspections WHERE region = 'Mediterranean' AND inspection_date BETWEEN '2019-01-01' AND '2019-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the minimum cost of electrical substation projects in the Europe region?", "schema": "CREATE TABLE InfrastructureProjects (id INT, name VARCHAR(100), region VARCHAR(50), project_type VARCHAR(50), cost FLOAT); INSERT INTO InfrastructureProjects (id, name, region, project_type, cost) VALUES (1, 'Paris Electrical Substation', 'Europe', 'electrical substation', 15000000);", "sql": "SELECT MIN(cost) FROM InfrastructureProjects WHERE region = 'Europe' AND project_type = 'electrical substation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Display the total number of marine species in the Pacific Ocean.", "schema": "CREATE TABLE marine_species_2 (name TEXT, location TEXT, num_individuals INT); INSERT INTO marine_species_2 (name, location, num_individuals) VALUES ('Clownfish', 'Indian Ocean', '10000'), ('Dolphin', 'Pacific Ocean', '20000');", "sql": "SELECT SUM(num_individuals) FROM marine_species_2 WHERE location = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the minimum maintenance cost for military equipment of type XYZ?", "schema": "CREATE TABLE military_equipment_cost (id INT, equipment_type VARCHAR(50), maintenance_cost FLOAT); INSERT INTO military_equipment_cost (id, equipment_type, maintenance_cost) VALUES (1, 'XYZ', 100000); INSERT INTO military_equipment_cost (id, equipment_type, maintenance_cost) VALUES (2, 'ABC', 200000);", "sql": "SELECT MIN(maintenance_cost) FROM military_equipment_cost WHERE equipment_type = 'XYZ';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team belongs to Delmar?", "schema": "CREATE TABLE table_name_36 (team VARCHAR, school VARCHAR)", "sql": "SELECT team FROM table_name_36 WHERE school = 'delmar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of goals which have played more than 44", "schema": "CREATE TABLE table_name_13 (goals_for VARCHAR, played INTEGER)", "sql": "SELECT COUNT(goals_for) FROM table_name_13 WHERE played > 44;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of assistive technology items provided to students with hearing impairments?", "schema": "CREATE TABLE Assistive_Technology (Student_ID INT, Student_Name TEXT, Disability_Type TEXT, Assistive_Tech_Item TEXT); INSERT INTO Assistive_Technology (Student_ID, Student_Name, Disability_Type, Assistive_Tech_Item) VALUES (1, 'John Doe', 'Visual Impairment', 'Screen Reader'), (2, 'Jane Smith', 'Hearing Impairment', 'Hearing Aid'), (3, 'Michael Brown', 'ADHD', 'None');", "sql": "SELECT SUM(CASE WHEN Disability_Type = 'Hearing Impairment' THEN 1 ELSE 0 END) FROM Assistive_Technology WHERE Assistive_Tech_Item IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times did the team lose who had a goal difference of +2, 52 goals for, and less than 9 draws?", "schema": "CREATE TABLE table_name_97 (lost INTEGER, drawn VARCHAR, goal_difference VARCHAR, goals_for VARCHAR)", "sql": "SELECT MIN(lost) FROM table_name_97 WHERE goal_difference = '+2' AND goals_for = 52 AND drawn < 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "List all records from the 'PlayerData' table where 'Age' is greater than 25", "schema": "CREATE TABLE PlayerData (PlayerID INT, Name VARCHAR(50), Age INT, Country VARCHAR(50)); INSERT INTO PlayerData (PlayerID, Name, Age, Country) VALUES ('1', 'John Doe', '25', 'USA'), ('2', 'Jane Smith', '30', 'Canada'), ('3', 'Mike Johnson', '22', 'USA'), ('4', 'Sarah Lee', '28', 'Canada'), ('5', 'Lucas Martinez', '35', 'Mexico');", "sql": "SELECT * FROM PlayerData WHERE Age > 25;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What yacht did Andrew Saies sail on?", "schema": "CREATE TABLE table_25561560_3 (yacht VARCHAR, skipper VARCHAR)", "sql": "SELECT yacht FROM table_25561560_3 WHERE skipper = 'Andrew Saies';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of cases in the 'restorative_justice' table?", "schema": "CREATE TABLE restorative_justice (id INT, case_id INT, case_type VARCHAR(50), participant_count INT); INSERT INTO restorative_justice (id, case_id, case_type, participant_count) VALUES (1, 1001, 'Mediation', 3), (2, 1002, 'Restorative Circle', 5), (3, 1003, 'Victim Offender Conference', 4);", "sql": "SELECT COUNT(*) FROM restorative_justice;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Uyghur Latin with a population of 69,361?", "schema": "CREATE TABLE table_name_14 (uyghur_latin___uly__ VARCHAR, population__2010_census_ VARCHAR)", "sql": "SELECT uyghur_latin___uly__ FROM table_name_14 WHERE population__2010_census_ = '69,361';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Insert a new policy record for policy type 'Travel'.", "schema": "CREATE TABLE Policy (PolicyID INT, PolicyType VARCHAR(50), PolicyHolderName VARCHAR(50), PolicyHolderAddress VARCHAR(50));", "sql": "INSERT INTO Policy (PolicyID, PolicyType, PolicyHolderName, PolicyHolderAddress) VALUES (4, 'Travel', 'Mark Brown', '321 Pine St');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the lowest number of cuts made while the win was less than 0?", "schema": "CREATE TABLE table_name_65 (cuts_made INTEGER, wins INTEGER)", "sql": "SELECT MIN(cuts_made) FROM table_name_65 WHERE wins < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many healthcare workers are there in 'rural_clinics' table per location?", "schema": "CREATE TABLE rural_clinics (id INT, name TEXT, location TEXT, num_workers INT, avg_age FLOAT); INSERT INTO rural_clinics (id, name, location, num_workers, avg_age) VALUES (1, 'Rural Clinic A', 'Rural Area 1', 10, 45.3), (2, 'Rural Clinic B', 'Rural Area 2', 15, 42.8);", "sql": "SELECT location, SUM(num_workers) FROM rural_clinics GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which club is listed when bonus points is bonus points", "schema": "CREATE TABLE table_20396710_1 (club VARCHAR)", "sql": "SELECT club FROM table_20396710_1 WHERE \"bonus_points\" = 'bonus_points';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the year when West Manila has a tariff increase of 6.5?", "schema": "CREATE TABLE table_17302440_1 (year VARCHAR, west_manila VARCHAR)", "sql": "SELECT year FROM table_17302440_1 WHERE west_manila = '6.5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List all the national security events related to 'Energy' in the 'NationalSecurity' schema.", "schema": "CREATE SCHEMA IF NOT EXISTS NationalSecurity; CREATE TABLE IF NOT EXISTS NationalSecurity.National_Security_Events (event_id INT, event_name VARCHAR(255), start_date DATE, end_date DATE, category VARCHAR(255)); INSERT INTO NationalSecurity.National_Security_Events (event_id, event_name, start_date, end_date, category) VALUES (1, 'Energy Crisis', '1973-10-01', '1974-03-18', 'Energy'), (2, 'Fukushima Daiichi Nuclear Disaster', '2011-03-11', '2011-03-15', 'Energy');", "sql": "SELECT * FROM NationalSecurity.National_Security_Events WHERE category = 'Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Update the carbon sequestration of 'Redwood' species in North America to 15,000,000.", "schema": "CREATE TABLE regions (id INT PRIMARY KEY, name VARCHAR(255)); INSERT INTO regions (id, name) VALUES (1, 'Europe'); INSERT INTO regions (id, name) VALUES (2, 'North America'); CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(255)); INSERT INTO species (id, name) VALUES (1, 'Spruce'); INSERT INTO species (id, name) VALUES (2, 'Pine'); INSERT INTO species (id, name) VALUES (3, 'Redwood'); CREATE TABLE carbon_sequestration (region_id INT, species_id INT, sequestration INT); INSERT INTO carbon_sequestration (region_id, species_id, sequestration) VALUES (2, 3, 12000000);", "sql": "UPDATE carbon_sequestration SET sequestration = 15000000 WHERE region_id = 2 AND species_id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What School is in the kio kio area?", "schema": "CREATE TABLE table_name_60 (name VARCHAR, area VARCHAR)", "sql": "SELECT name FROM table_name_60 WHERE area = 'kio kio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest no. in series?", "schema": "CREATE TABLE table_17758010_2 (no_in_series INTEGER)", "sql": "SELECT MIN(no_in_series) FROM table_17758010_2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 192).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION col_isnt_fk ( NAME, NAME, NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total Attendance for Games while Chicago was the visiting Team?", "schema": "CREATE TABLE table_name_61 (attendance INTEGER, visitor VARCHAR)", "sql": "SELECT SUM(attendance) FROM table_name_61 WHERE visitor = 'chicago';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What campus is in Brgy. Alangilan, Batangas City?", "schema": "CREATE TABLE table_name_59 (campus VARCHAR, location VARCHAR)", "sql": "SELECT campus FROM table_name_59 WHERE location = 'brgy. alangilan, batangas city';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Reported Offenses larger than 216, and a U.S. Rate smaller than 3274, and a Texas Rate smaller than 2688.9, and a Crime of violent crime has what killeen rate?", "schema": "CREATE TABLE table_name_8 (killeen_rate INTEGER, crime VARCHAR, texas_rate VARCHAR, reported_offenses VARCHAR, us_rate VARCHAR)", "sql": "SELECT SUM(killeen_rate) FROM table_name_8 WHERE reported_offenses > 216 AND us_rate < 3274 AND texas_rate < 2688.9 AND crime = 'violent crime';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the average sourcing distance for a given ingredient?", "schema": "CREATE TABLE ingredients (ingredient_id INT, product_id INT, sourcing_distance FLOAT); INSERT INTO ingredients VALUES (1, 1, 250.5), (2, 1, 350.2), (3, 2, 150.8), (4, 2, 450.9);", "sql": "SELECT AVG(sourcing_distance) FROM ingredients WHERE ingredient_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the team play and what was the attendance against new jersey?", "schema": "CREATE TABLE table_13619135_7 (location_attendance VARCHAR, team VARCHAR)", "sql": "SELECT location_attendance FROM table_13619135_7 WHERE team = 'New Jersey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which REEs were produced in 2017?", "schema": "CREATE TABLE production (year INT, element TEXT, quantity INT); INSERT INTO production (year, element, quantity) VALUES (2015, 'Dysprosium', 100), (2016, 'Dysprosium', 150), (2017, 'Dysprosium', 200), (2018, 'Dysprosium', 250), (2019, 'Dysprosium', 300), (2020, 'Dysprosium', 350), (2015, 'Neodymium', 500), (2016, 'Neodymium', 600), (2017, 'Neodymium', 700), (2018, 'Neodymium', 800), (2019, 'Neodymium', 900), (2020, 'Neodymium', 1000);", "sql": "SELECT DISTINCT element FROM production WHERE year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the grid for Jack Brabham with more than 65 laps?", "schema": "CREATE TABLE table_name_34 (grid INTEGER, driver VARCHAR, laps VARCHAR)", "sql": "SELECT SUM(grid) FROM table_name_34 WHERE driver = 'jack brabham' AND laps > 65;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the average severity score of vulnerabilities detected in the healthcare sector?", "schema": "CREATE TABLE vulnerabilities (id INT, sector VARCHAR(255), severity FLOAT); INSERT INTO vulnerabilities (id, sector, severity) VALUES (1, 'healthcare', 7.5), (2, 'finance', 5.2), (3, 'healthcare', 8.1);", "sql": "SELECT AVG(severity) FROM vulnerabilities WHERE sector = 'healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the maximum number of security incidents recorded in a single day for each month?", "schema": "CREATE TABLE Incidents (id INT, incident_date DATE, country VARCHAR(255)); INSERT INTO Incidents (id, incident_date, country) VALUES (1, '2022-03-01', 'India'), (2, '2022-03-15', 'Germany'), (3, '2022-04-01', 'Brazil');", "sql": "SELECT EXTRACT(MONTH FROM Incidents.incident_date) AS Month, MAX(COUNT(*)) AS Max_Incidents_Per_Day FROM Incidents GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "pgTAP test for Throwtap (assertion 9).", "schema": null, "sql": "SELECT * FROM check_test(\n throws_ok( 'mytest', 'P0001'),\n true,\n 'prepared statement & errcode',\n 'threw P0001'\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Throwtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "How many cases were handled by each judge in the District Court last year?", "schema": "CREATE TABLE judge_cases (judge_name VARCHAR(20), court_type VARCHAR(20), num_cases INT); INSERT INTO judge_cases (judge_name, court_type, num_cases) VALUES ('Judge 1', 'District Court', 200), ('Judge 2', 'District Court', 300), ('Judge 3', 'District Court', 400);", "sql": "SELECT judge_name, SUM(num_cases) as total_cases FROM judge_cases WHERE court_type = 'District Court' GROUP BY judge_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "How many heritage sites are there in Asia, and what are their names?", "schema": "CREATE TABLE HeritageSites (id INT, name TEXT, location TEXT); INSERT INTO HeritageSites (id, name, location) VALUES (1, 'Taj Mahal', 'India'); CREATE TABLE Locations (id INT, site_id INT, continent TEXT); INSERT INTO Locations (id, site_id, continent) VALUES (1, 1, 'Asia');", "sql": "SELECT HS.name, COUNT(*) FROM HeritageSites HS INNER JOIN Locations L ON HS.id = L.site_id WHERE L.continent = 'Asia' GROUP BY HS.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 86).", "schema": null, "sql": "CREATE TABLE ATACC2 () INHERITS (ATACC1);", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 41, "num_statements": 1} {"question": "Who is the youngest employee in the Sales department?", "schema": "CREATE TABLE Employees (Employee_ID INT, First_Name VARCHAR(20), Last_Name VARCHAR(20), Department VARCHAR(20), Salary DECIMAL(10,2), Date_Hired DATE); CREATE VIEW Youngest_Employee AS SELECT Employee_ID, First_Name, Last_Name, Department, Salary, Date_Hired FROM Employees WHERE Date_Hired = (SELECT MIN(Date_Hired) FROM Employees); CREATE VIEW Youngest_Sales_Employee AS SELECT * FROM Youngest_Employee WHERE Department = 'Sales';", "sql": "SELECT * FROM Youngest_Sales_Employee;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "What is the minimum donation amount in 'India' for the year 2022?", "schema": "CREATE TABLE Donations (DonationID int, DonorID int, DonationDate date, DonationAmount decimal(10,2)); INSERT INTO Donations (DonationID, DonorID, DonationDate, DonationAmount) VALUES (1, 1, '2022-01-01', 50.00);", "sql": "SELECT MIN(DonationAmount) FROM Donations WHERE Country = 'India' AND YEAR(DonationDate) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'circle': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT diameter(f1) AS diameter\n FROM CIRCLE_TBL;", "explanation": "Regression test for Circle in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT diameter(f1) AS diameter\n FROM CIRCLE_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what series did Alice Levine Jamie East give a presentation on Saturday?", "schema": "CREATE TABLE table_name_60 (series VARCHAR, saturday VARCHAR)", "sql": "SELECT series FROM table_name_60 WHERE saturday = 'alice levine jamie east';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the chinese traditional for 美丽人生", "schema": "CREATE TABLE table_1893815_1 (chinese__traditional_ VARCHAR, chinese__simplified_ VARCHAR)", "sql": "SELECT chinese__traditional_ FROM table_1893815_1 WHERE chinese__simplified_ = '美丽人生';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the official website of Burning Flame 烈火雄心 with 22 episodes?", "schema": "CREATE TABLE table_name_33 (official_website VARCHAR, number_of_episodes VARCHAR, english_title__chinese_title_ VARCHAR)", "sql": "SELECT official_website FROM table_name_33 WHERE number_of_episodes > 22 AND english_title__chinese_title_ = 'burning flame 烈火雄心';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Which vessels arrived in the Port of Long Beach more than once in the last month?", "schema": "CREATE TABLE ports (id INT, name TEXT); INSERT INTO ports (id, name) VALUES (1, 'Port of Long Beach'); CREATE TABLE vessel_arrivals (id INT, port_id INT, vessel_id INT, arrival_date DATE); INSERT INTO vessel_arrivals (id, port_id, vessel_id, arrival_date) VALUES (1, 1, 1, '2022-01-01'), (2, 1, 1, '2022-01-15'), (3, 1, 2, '2022-02-01');", "sql": "SELECT DISTINCT v.vessel_name FROM vessel_arrivals va JOIN vessels v ON v.id = va.vessel_id WHERE va.port_id = (SELECT id FROM ports WHERE name = 'Port of Long Beach') AND va.arrival_date BETWEEN DATEADD(day, -30, CURRENT_DATE) AND CURRENT_DATE GROUP BY v.vessel_name HAVING COUNT(v.vessel_name) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 301, "num_statements": 1} {"question": "What are the total number of accidents for each company?", "schema": "CREATE TABLE accidents (id INT PRIMARY KEY, company VARCHAR(50), accident_year INT); INSERT INTO accidents (id, company, accident_year) VALUES (1, 'SpaceX', 2000), (2, 'Rocket Lab', 2005), (3, 'SpaceX', 2010), (4, 'Rocket Lab', 2015);", "sql": "SELECT company, COUNT(*) FROM accidents GROUP BY company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of the episode with a production code of 3x6404?", "schema": "CREATE TABLE table_27776266_1 (title VARCHAR, production_code VARCHAR)", "sql": "SELECT title FROM table_27776266_1 WHERE production_code = '3X6404';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "CREATE FUNCTION gin_extract_query_name(name, internal, int2, internal, internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average attendance at a game held at Firhill for the 5(r) round?", "schema": "CREATE TABLE table_name_26 (attendance INTEGER, venue VARCHAR, round VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_26 WHERE venue = 'firhill' AND round = '5(r)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Find all warehouses located in countries with more than 5 fulfillment centers?", "schema": "CREATE TABLE Warehouse (WarehouseID INT, WarehouseName TEXT, Country TEXT); INSERT INTO Warehouse (WarehouseID, WarehouseName, Country) VALUES (1, 'Central Warehouse', 'USA'), (2, 'East Coast Warehouse', 'USA'), (3, 'West Coast Warehouse', 'USA'), (4, 'Toronto Warehouse', 'Canada'); CREATE TABLE FulfillmentCenter (FCID INT, FCName TEXT, Country TEXT); INSERT INTO FulfillmentCenter (FCID, FCName, Country) VALUES (1, 'Chicago FC', 'USA'), (2, 'New York FC', 'USA'), (3, 'Los Angeles FC', 'USA'), (4, 'Toronto FC', 'Canada');", "sql": "SELECT Country FROM Warehouse GROUP BY Country HAVING COUNT(DISTINCT WarehouseID) <= (SELECT COUNT(DISTINCT FCID) FROM FulfillmentCenter GROUP BY Country HAVING COUNT(DISTINCT FCID) > 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Show the production quantity, waste produced, and energy consumption for the chemical with the highest safety rating", "schema": "CREATE TABLE Chemicals_Data (chemical_id INT, safety_rating DECIMAL(3,2), production_quantity INT, waste_amount DECIMAL(5,2), energy_consumption INT);", "sql": "SELECT cd.chemical_id, cd.production_quantity, cd.waste_amount, cd.energy_consumption FROM Chemicals_Data cd JOIN (SELECT chemical_id, MAX(safety_rating) as max_safety_rating FROM Chemicals_Data GROUP BY chemical_id) ms ON cd.chemical_id = ms.chemical_id AND cd.safety_rating = ms.max_safety_rating;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 299, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who wrote the episode \"The Dream Lover\", which was viewed by 3.96 million viewers?", "schema": "CREATE TABLE table_17467578_1 (written_by VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT written_by FROM table_17467578_1 WHERE us_viewers__million_ = '3.96';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the percentage of donations made by first-time donors in the last month?", "schema": "CREATE TABLE donations (id INT, donor_id INT, is_first_time_donor BOOLEAN, amount DECIMAL(10, 2), donation_date DATE);", "sql": "SELECT 100.0 * SUM(CASE WHEN is_first_time_donor THEN amount ELSE 0 END) / SUM(amount) as pct_first_time_donors;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which round has a Kick Off of 1992-10-01 21:15?", "schema": "CREATE TABLE table_name_42 (round VARCHAR, kick_off VARCHAR)", "sql": "SELECT round FROM table_name_42 WHERE kick_off = '1992-10-01 21:15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 34).", "schema": null, "sql": "SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum safety rating in the 'testing_results' table?", "schema": "CREATE TABLE testing_results (id INT PRIMARY KEY, vehicle_id INT, safety_rating INT, crash_test_date DATE);", "sql": "SELECT MAX(safety_rating) FROM testing_results;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "How many open data initiatives were launched in California in 2020?", "schema": "CREATE TABLE open_data_initiatives (id INT, state TEXT, launch_date DATE); INSERT INTO open_data_initiatives (id, state, launch_date) VALUES (1, 'California', '2020-01-01'), (2, 'New York', '2019-12-31'), (3, 'California', '2020-03-15'), (4, 'Texas', '2019-11-25');", "sql": "SELECT COUNT(*) FROM open_data_initiatives WHERE state = 'California' AND EXTRACT(YEAR FROM launch_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Insert a new renewable energy source (wind) into the renewables table, and assign it a name, production value, and creation timestamp.", "schema": "CREATE TABLE renewables (id INT, name VARCHAR(50), type VARCHAR(50), production FLOAT, created_at TIMESTAMP);", "sql": "INSERT INTO renewables (name, type, production, created_at) VALUES ('Wind Farm 1', 'wind', 5000000, '2022-01-01 00:00:00');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the average fare for a train trip in Seoul?", "schema": "CREATE TABLE train_routes (route_id INT, route_name VARCHAR(255), city VARCHAR(255), fare DECIMAL(5,2));", "sql": "SELECT AVG(fare) FROM train_routes WHERE city = 'Seoul';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many receptions were smaller than 7?", "schema": "CREATE TABLE table_name_12 (reception VARCHAR, long INTEGER)", "sql": "SELECT COUNT(reception) FROM table_name_12 WHERE long < 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Works Number that has a Number of 153 in 1916?", "schema": "CREATE TABLE table_name_39 (works_number VARCHAR, date VARCHAR, number VARCHAR)", "sql": "SELECT works_number FROM table_name_39 WHERE date = '1916' AND number = '153';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total number of spacecraft launched by the European Space Agency?", "schema": "CREATE TABLE Space_Agencies (ID INT, Agency VARCHAR(50), Country VARCHAR(50), Total_Spacecraft INT); INSERT INTO Space_Agencies (ID, Agency, Country, Total_Spacecraft) VALUES (1, 'European Space Agency', 'Europe', 50), (2, 'National Aeronautics and Space Administration', 'USA', 200), (3, 'Roscosmos', 'Russia', 150), (4, 'China National Space Administration', 'China', 100), (5, 'Indian Space Research Organisation', 'India', 75);", "sql": "SELECT Total_Spacecraft FROM Space_Agencies WHERE Agency = 'European Space Agency';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show the number of museums and libraries in each borough of New York City from the 'nyc_culture_database'", "schema": "CREATE TABLE nyc_boroughs (id INT PRIMARY KEY, name VARCHAR(255));CREATE TABLE cultural_institutions (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), borough_id INT, FOREIGN KEY (borough_id) REFERENCES nyc_boroughs(id)); INSERT INTO nyc_boroughs (id, name) VALUES (1, 'Manhattan'); INSERT INTO nyc_boroughs (id, name) VALUES (2, 'Brooklyn'); INSERT INTO cultural_institutions (id, name, type, borough_id) VALUES (1, 'New York Public Library', 'library', 1); INSERT INTO cultural_institutions (id, name, type, borough_id) VALUES (2, 'Metropolitan Museum of Art', 'museum', 1);", "sql": "SELECT nyc_boroughs.name as borough_name, COUNT(CASE WHEN cultural_institutions.type = 'museum' THEN 1 ELSE NULL END) as museum_count, COUNT(CASE WHEN cultural_institutions.type = 'library' THEN 1 ELSE NULL END) as library_count FROM nyc_boroughs INNER JOIN cultural_institutions ON nyc_boroughs.id = cultural_institutions.borough_id GROUP BY nyc_boroughs.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 361, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the road numbers when the build year is 1943, the railroad (quantity) is clinchfield railroad (12 new, 6 secondhand)?", "schema": "CREATE TABLE table_name_33 (road_numbers VARCHAR, build_year VARCHAR, railroad__quantity_ VARCHAR)", "sql": "SELECT road_numbers FROM table_name_33 WHERE build_year = '1943' AND railroad__quantity_ = 'clinchfield railroad (12 new, 6 secondhand)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the GDP of the nation with 56,210,000 people?", "schema": "CREATE TABLE table_name_77 (gdp_per_capita__us$_ VARCHAR, population VARCHAR)", "sql": "SELECT gdp_per_capita__us$_ FROM table_name_77 WHERE population = '56,210,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the highest capacity prison in the 'prisons' table?", "schema": "CREATE TABLE prisons (id INT, name VARCHAR(50), location VARCHAR(50), capacity INT, population INT, avg_age FLOAT); INSERT INTO prisons (id, name, location, capacity, population, avg_age) VALUES (1, 'Folsom State Prison', 'California', 2600, 2100, 35.5), (2, 'Sing Sing Correctional Facility', 'New York', 1932, 1585, 42.3);", "sql": "SELECT name FROM prisons ORDER BY capacity DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the average data usage for mobile customers in each country, segmented by prepaid and postpaid?", "schema": "CREATE TABLE mobile_customers (customer_id INT, data_usage FLOAT, country VARCHAR(50), postpaid BOOLEAN); INSERT INTO mobile_customers (customer_id, data_usage, country, postpaid) VALUES (1, 2000, 'USA', TRUE), (2, 1500, 'Mexico', FALSE), (3, 3000, 'Canada', TRUE);", "sql": "SELECT country, postpaid, AVG(data_usage) AS avg_data_usage FROM mobile_customers GROUP BY country, postpaid;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 115).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (2,3,'60301034611793381560791130065937008239.1887410058901624055165373281235236307966057696953851292799409809571799686645246659986351515277852800926805119259053513475211488115663286642009614039264484259692394657121785950542874788161683538629473170704026975786513125842675604577233871570629808699803522400038975396500769162308448069085909755023233588510630417065084295051270219462289785473643946404281422516357503746700705970360169619852905053433235726497292406142332833');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 503, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Atlanta was a visitor on December 8, what was their record?", "schema": "CREATE TABLE table_name_68 (record VARCHAR, visitor VARCHAR)", "sql": "SELECT record FROM table_name_68 WHERE visitor = 'atlanta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the city of txdot har?", "schema": "CREATE TABLE table_name_47 (city_of_license VARCHAR, brand VARCHAR)", "sql": "SELECT city_of_license FROM table_name_47 WHERE brand = 'txdot har';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many climate adaptation projects are there in South America with a budget greater than $50,000?", "schema": "CREATE TABLE climate_adaptation (id INT, project_name TEXT, budget INT, location TEXT); INSERT INTO climate_adaptation (id, project_name, budget, location) VALUES (1, 'Flood Prevention', 75000, 'South America'); INSERT INTO climate_adaptation (id, project_name, budget, location) VALUES (2, 'Drought Resistance', 40000, 'Africa');", "sql": "SELECT COUNT(*) FROM climate_adaptation WHERE location = 'South America' AND budget > 50000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Identify the dispensaries in the state of Colorado with the highest average wholesale order quantity for the last 6 months, and their corresponding average wholesale order quantity.", "schema": "CREATE TABLE dispensaries (id INT PRIMARY KEY, name VARCHAR(255), state VARCHAR(255), location VARCHAR(255)); CREATE TABLE wholesale_orders (id INT PRIMARY KEY, dispensary_id INT, strain_id INT, quantity INT, order_date DATE);", "sql": "SELECT dispensaries.name, AVG(wholesale_orders.quantity) as avg_quantity FROM dispensaries INNER JOIN wholesale_orders ON dispensaries.id = wholesale_orders.dispensary_id WHERE dispensaries.state = 'Colorado' AND wholesale_orders.order_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND CURRENT_DATE GROUP BY dispensaries.id ORDER BY avg_quantity DESC LIMIT 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 368, "num_statements": 1} {"question": "What is the average accuracy of models trained on fairness_training_data?", "schema": "CREATE TABLE fairness_training_data (model_name TEXT, accuracy FLOAT);", "sql": "SELECT AVG(accuracy) FROM fairness_training_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the minimum salary of athletes in the tennis_players table?", "schema": "CREATE TABLE tennis_players (player_id INT, name VARCHAR(50), country VARCHAR(50), ranking INT, salary DECIMAL(10, 2)); INSERT INTO tennis_players (player_id, name, country, ranking, salary) VALUES (1, 'Novak Djokovic', 'Serbia', 1, 21000000.00); INSERT INTO tennis_players (player_id, name, country, ranking, salary) VALUES (2, 'Rafael Nadal', 'Spain', 2, 18000000.00);", "sql": "SELECT MIN(salary) FROM tennis_players;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Add a new virtual tourism attraction to 'attractions' table", "schema": "CREATE TABLE attractions (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), country VARCHAR(255));", "sql": "INSERT INTO attractions (id, name, type, country) VALUES (1, 'Amazon Rainforest Virtual Tour', 'Virtual', 'Brazil');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_generic' (example 37).", "schema": null, "sql": "CREATE SERVER alt_fserv2 FOREIGN DATA WRAPPER alt_fdw2;", "explanation": "DDL from PostgreSQL core regression test for Alter Generic.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Display the names of all community development projects in the 'community_development' table, excluding those with a budget over 30000.", "schema": "CREATE TABLE community_development (name VARCHAR(255), budget INT); INSERT INTO community_development (name, budget) VALUES ('Handicraft Training', 15000), ('Local Business Support', 25000), ('Tourism Development', 30000);", "sql": "SELECT name FROM community_development WHERE budget <= 30000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total installed capacity of solar energy projects in the 'renewables' schema?", "schema": "CREATE SCHEMA if not exists renewables; CREATE TABLE if not exists renewables.solar_projects (project_id int, name varchar(255), location varchar(255), installed_capacity float); INSERT INTO renewables.solar_projects (project_id, name, location, installed_capacity) VALUES (1, 'Solar Project 1', 'Country A', 50.0), (2, 'Solar Project 2', 'Country B', 75.0);", "sql": "SELECT SUM(installed_capacity) FROM renewables.solar_projects;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (sort_optimization, item 2).", "schema": null, "sql": "CREATE INDEX ON order_test(time,device_id);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total waste generation by month for the material 'Metal' in 2021?", "schema": "CREATE TABLE monthly_waste_generation (month VARCHAR(10), year INT, material VARCHAR(20), quantity INT); INSERT INTO monthly_waste_generation (month, year, material, quantity) VALUES ('January', 2021, 'Metal', 500), ('February', 2021, 'Metal', 600), ('March', 2021, 'Metal', 700), ('April', 2021, 'Metal', 800), ('May', 2021, 'Metal', 900);", "sql": "SELECT STRFTIME('%m', month) as month, SUM(quantity) as total_waste FROM monthly_waste_generation WHERE year = 2021 AND material = 'Metal' GROUP BY STRFTIME('%m', month);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the minimum budget for a biosensor technology project in each city?", "schema": "CREATE TABLE biosensors(id INT, project VARCHAR(50), city VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO biosensors VALUES (1, 'ProjectA', 'LA', 3000000.00), (2, 'ProjectB', 'NYC', 5000000.00), (3, 'ProjectC', 'LA', 4000000.00);", "sql": "SELECT city, MIN(budget) FROM biosensors GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 43).", "schema": null, "sql": "CREATE FUNCTION sort_desc(_int4)\nRETURNS _int4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "pgTAP test for Ruletap (assertion 5).", "schema": null, "sql": "CREATE RULE upd_me AS ON UPDATE TO public.sometab DO ALSO SELECT now();", "explanation": "SQL assertion from pgTAP test suite for Ruletap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 71, "num_statements": 1} {"question": "List all facilities along with their waste types", "schema": "CREATE TABLE WasteTypes (waste_type_id INT PRIMARY KEY, name VARCHAR, description VARCHAR); CREATE TABLE Facilities (facility_id INT PRIMARY KEY, name VARCHAR, location VARCHAR, capacity INT, waste_type_id INT, FOREIGN KEY (waste_type_id) REFERENCES WasteTypes(waste_type_id)); INSERT INTO WasteTypes (waste_type_id, name, description) VALUES (1, 'Recyclable Waste', 'Waste that can be recycled');", "sql": "SELECT Facilities.name AS facility_name, WasteTypes.name AS waste_type_name FROM Facilities INNER JOIN WasteTypes ON Facilities.waste_type_id = WasteTypes.waste_type_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "How many vulnerabilities were discovered in the last month, grouped by their severity levels?", "schema": "CREATE TABLE vulnerabilities (id INT, severity VARCHAR(50), discovered_at TIMESTAMP);", "sql": "SELECT severity, COUNT(*) as num_vulnerabilities FROM vulnerabilities WHERE discovered_at >= NOW() - INTERVAL '1 month' GROUP BY severity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the average price of garments made with organic cotton, per country?", "schema": "CREATE TABLE materials (id INT, garment_type VARCHAR(255), material VARCHAR(255), price DECIMAL(10,2)); INSERT INTO materials (id, garment_type, material, price) VALUES (1, 'T-Shirt', 'Organic Cotton', 25.99), (2, 'Pants', 'Organic Cotton', 49.99), (3, 'Jacket', 'Organic Cotton', 79.99);", "sql": "SELECT material, AVG(price) as avg_price, country FROM materials m JOIN (SELECT garment_type, material, country FROM garments g JOIN country_data cd ON g.manufacturer_id = cd.id) sub ON m.garment_type = sub.garment_type AND m.material = sub.material GROUP BY material, country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 277, "num_statements": 1} {"question": "What is the average Gadolinium production by month for 2021 and 2022?", "schema": "CREATE TABLE mines (id INT, name TEXT, location TEXT, gadolinium_production FLOAT, timestamp DATE); INSERT INTO mines (id, name, location, gadolinium_production, timestamp) VALUES (1, 'Mine A', 'Canada', 120.5, '2021-01-01'), (2, 'Mine B', 'Canada', 150.7, '2021-02-01'), (3, 'Mine C', 'USA', 200.3, '2021-03-01'), (4, 'Mine D', 'Canada', 250.3, '2022-01-01'), (5, 'Mine E', 'USA', 300.3, '2022-02-01');", "sql": "SELECT MONTH(timestamp), AVG(gadolinium_production) FROM mines WHERE YEAR(timestamp) IN (2021, 2022) GROUP BY MONTH(timestamp);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 357).", "schema": null, "sql": "SELECT multirange_minus_multi(nummultirange(numrange(1,2)), nummultirange(numrange(2,4)));", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT multirange_minus_multi(nummultirange(numrange(1,2)), nummultirange(numrange(2,4)))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Delete records of donors who have not donated more than $10000 in the 'donations' table.", "schema": "CREATE TABLE donations (donor_id INT, donation_amount DECIMAL(10,2)); INSERT INTO donations (donor_id, donation_amount) VALUES (1, 50000.00), (2, 75000.00), (3, 60000.00), (4, 45000.00), (5, 30000.00), (6, 500.00), (7, 1000.50), (8, 1500.00);", "sql": "DELETE FROM donations WHERE donor_id NOT IN (SELECT donor_id FROM (SELECT donor_id, MAX(donation_amount) AS max_donation FROM donations GROUP BY donor_id) t WHERE max_donation > 10000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Find the total number of marine species and their observation counts in the Indian Ocean, excluding sharks and rays.", "schema": "CREATE TABLE indian_marine_species (species VARCHAR(255), count INT); INSERT INTO indian_marine_species (species, count) VALUES ('Turtle', 150), ('Shark', 200), ('Manta Ray', 100), ('Dolphin', 120);", "sql": "SELECT COUNT(DISTINCT species) AS species_count, SUM(count) AS total_count FROM indian_marine_species WHERE species NOT IN ('Shark', 'Manta Ray');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What are the names and types of military technologies developed in '2019' according to the 'Mil_Tech' table?", "schema": "CREATE TABLE Mil_Tech (tech_id INT, tech_name VARCHAR(50), tech_year INT, tech_type VARCHAR(50)); INSERT INTO Mil_Tech (tech_id, tech_name, tech_year, tech_type) VALUES (1, 'Stealth Fighter', 2019, 'Aircraft'); INSERT INTO Mil_Tech (tech_id, tech_name, tech_year, tech_type) VALUES (2, 'Carrier Battlegroup', 2017, 'Naval');", "sql": "SELECT tech_name, tech_type FROM Mil_Tech WHERE tech_year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the average bronze when the team is northwest territories and gold is more than 34?", "schema": "CREATE TABLE table_name_23 (bronze INTEGER, team VARCHAR, gold VARCHAR)", "sql": "SELECT AVG(bronze) FROM table_name_23 WHERE team = 'northwest territories' AND gold > 34;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 37).", "schema": null, "sql": "SELECT index('0.1.2.3.5.4.5.6.8.5.6.8','5.6',-7);", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Which excavation sites have produced Roman pottery?", "schema": "CREATE TABLE Sites (SiteID int, SiteName text, Location text, Era text); INSERT INTO Sites (SiteID, SiteName, Location, Era) VALUES (1, 'Pompeii', 'Italy', 'Roman'); CREATE TABLE Artifacts (ArtifactID int, ArtifactName text, SiteID int, Era text); INSERT INTO Artifacts (ArtifactID, ArtifactName, SiteID, Era) VALUES (1, 'Roman Pottery', 1, 'Roman');", "sql": "SELECT Sites.SiteName FROM Sites INNER JOIN Artifacts ON Sites.SiteID = Artifacts.SiteID WHERE Artifacts.Era = 'Roman' AND Artifacts.ArtifactName = 'Roman Pottery';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players hometown was Cincinnati, Ohio?", "schema": "CREATE TABLE table_11677691_4 (player VARCHAR, hometown VARCHAR)", "sql": "SELECT COUNT(player) FROM table_11677691_4 WHERE hometown = 'Cincinnati, Ohio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Venue had 2 or more Goals in a Friendly Competition?", "schema": "CREATE TABLE table_name_48 (venue VARCHAR, competition VARCHAR, goal VARCHAR)", "sql": "SELECT venue FROM table_name_48 WHERE competition = 'friendly' AND goal > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total number of military vehicles sold by ACME Corp to the African region in the year 2022?", "schema": "CREATE TABLE Military_Equipment_Sales (supplier VARCHAR(255), region VARCHAR(255), equipment VARCHAR(255), quantity INT, sale_year INT);", "sql": "SELECT SUM(quantity) FROM Military_Equipment_Sales WHERE supplier = 'ACME Corp' AND region = 'Africa' AND sale_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the total number of unique donors for each program category, excluding those with a total donation amount below $10,000?", "schema": "CREATE TABLE donor_category (donor_id INT, program_category VARCHAR(20), donation_amount INT);INSERT INTO donor_category VALUES (1, 'Arts', 5000), (2, 'Arts', 3000), (3, 'Education', 7000), (4, 'Health', 15000), (5, 'Arts', 8000), (6, 'Education', 6000);", "sql": "SELECT program_category, COUNT(DISTINCT donor_id) FROM donor_category WHERE program_category IN (SELECT program_category FROM donor_category WHERE donation_amount >= 10000) GROUP BY program_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "What is the minimum rent for units in each building, ordered by building type and then by rent?", "schema": "CREATE TABLE Buildings (building_id INT, name VARCHAR(50), building_type VARCHAR(50));CREATE TABLE Units (unit_id INT, building_id INT, rent INT);", "sql": "SELECT b.building_type, b.name, MIN(u.rent) as min_rent FROM Units u JOIN Buildings b ON u.building_id = b.building_id GROUP BY b.building_type, b.name ORDER BY b.building_type, min_rent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Julio Robledo represents which country?", "schema": "CREATE TABLE table_name_55 (country VARCHAR, director_s_ VARCHAR)", "sql": "SELECT country FROM table_name_55 WHERE director_s_ = 'julio robledo';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_data' (example 54).", "schema": null, "sql": "CREATE USER MAPPING FOR current_user SERVER s1;", "explanation": "DDL from PostgreSQL core regression test for Foreign Data.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 713).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION db_owner_is ( NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which allergy type has most number of allergies?", "schema": "CREATE TABLE Allergy_type (allergytype VARCHAR)", "sql": "SELECT allergytype FROM Allergy_type GROUP BY allergytype ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is week 15's result?", "schema": "CREATE TABLE table_name_24 (result VARCHAR, week VARCHAR)", "sql": "SELECT result FROM table_name_24 WHERE week = 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What result occurs when the round is 4 leg 2?", "schema": "CREATE TABLE table_name_87 (result VARCHAR, round VARCHAR)", "sql": "SELECT result FROM table_name_87 WHERE round = '4 leg 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many international builders are there?", "schema": "CREATE TABLE table_name_84 (total VARCHAR, builder VARCHAR)", "sql": "SELECT total FROM table_name_84 WHERE builder = 'international';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 249).", "schema": null, "sql": "select generate_series(timestamptz '1995-08-06 12:12:12', timestamptz '1996-08-06 12:12:12', interval 'infinity');", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select generate_series(timestamptz '1995-08-06 12:12:12', timestamptz '1996-08-06 12:12:12', interval 'infinity')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Find the average age of players who have participated in esports events, segmented by the continent they reside in.", "schema": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(100), Age INT, Country VARCHAR(50)); INSERT INTO Players VALUES (1, 'John Doe', 25, 'USA'); INSERT INTO Players VALUES (2, 'Jane Smith', 28, 'Canada'); CREATE TABLE Countries (Country VARCHAR(50), Continent VARCHAR(50)); INSERT INTO Countries VALUES ('USA', 'North America'); INSERT INTO Countries VALUES ('Canada', 'North America'); CREATE TABLE EsportsEvents (PlayerID INT, EventName VARCHAR(100)); INSERT INTO EsportsEvents VALUES (1, 'GameX Championship'); INSERT INTO EsportsEvents VALUES (2, 'TournamentY');", "sql": "SELECT C.Continent, AVG(P.Age) as AvgAge FROM Players P JOIN Countries C ON P.Country = C.Country JOIN EsportsEvents E ON P.PlayerID = E.PlayerID GROUP BY C.Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Which decentralized applications (DApps) are associated with the 'Ethereum' blockchain and what are their names?", "schema": "CREATE TABLE dapps (dapp_id INT, dapp_name VARCHAR(50), blockchain_id INT); CREATE TABLE blockchains (blockchain_id INT, blockchain_name VARCHAR(50)); INSERT INTO dapps (dapp_id, dapp_name, blockchain_id) VALUES (1, 'Uniswap', 1); INSERT INTO blockchains (blockchain_id, blockchain_name) VALUES (1, 'Ethereum');", "sql": "SELECT dapps.dapp_name, blockchains.blockchain_name FROM dapps INNER JOIN blockchains ON dapps.blockchain_id = blockchains.blockchain_id WHERE blockchains.blockchain_name = 'Ethereum';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What is the explainability score for each AI safety concern in Europe?", "schema": "CREATE TABLE AISafety (id INT, concern VARCHAR(255), explainability_score DECIMAL(5,2), region VARCHAR(255)); INSERT INTO AISafety (id, concern, explainability_score, region) VALUES (1, 'Data Privacy', 78.91, 'Europe'), (2, 'Unintended Consequences', 65.23, 'Asia'), (3, 'Bias', 82.34, 'Europe');", "sql": "SELECT concern, explainability_score FROM AISafety WHERE region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Against the Houston Oilers after week 14, what was the result of the game?", "schema": "CREATE TABLE table_name_64 (result VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_64 WHERE week > 14 AND opponent = 'houston oilers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Which regions are facing severe water scarcity in 'WaterScarcity' table?", "schema": "CREATE TABLE WaterScarcity (region VARCHAR(20), scarcity_level VARCHAR(20)); INSERT INTO WaterScarcity (region, scarcity_level) VALUES ('RegionA', 'Moderate'), ('RegionB', 'Severe'), ('RegionC', 'Critical');", "sql": "SELECT region FROM WaterScarcity WHERE scarcity_level = 'Severe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Update the address of the contractor 'BMC' in the 'contractors' table", "schema": "CREATE TABLE contractors (contractor_id INT, name VARCHAR(50), address VARCHAR(100));", "sql": "UPDATE contractors SET address = '123 Green Street' WHERE name = 'BMC';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest game for a record of 32-12-3?", "schema": "CREATE TABLE table_name_48 (game INTEGER, record VARCHAR)", "sql": "SELECT MIN(game) FROM table_name_48 WHERE record = '32-12-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the average calorie count and total fat for each cuisine type", "schema": "CREATE TABLE cuisine (id INT, type VARCHAR(255), avg_calories DECIMAL(5,2), total_fat DECIMAL(5,2)); CREATE TABLE dishes (id INT, cuisine_id INT, name VARCHAR(255), calories DECIMAL(5,2), total_fat DECIMAL(5,2)); INSERT INTO cuisine (id, type, avg_calories, total_fat) VALUES (1, 'Italian', NULL, NULL), (2, 'Mexican', NULL, NULL); INSERT INTO dishes (id, cuisine_id, name, calories, total_fat) VALUES (1, 1, 'Pasta', 500, 20), (2, 1, 'Pizza', 800, 35), (3, 2, 'Tacos', 400, 15), (4, 2, 'Burritos', 700, 30);", "sql": "SELECT c.type, AVG(d.calories) AS avg_calories, SUM(d.total_fat) AS total_fat FROM cuisine c JOIN dishes d ON c.id = d.cuisine_id GROUP BY c.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "How many disaster response volunteers are there in 'regions' table and what are their names?", "schema": "CREATE TABLE regions (region_id INT, volunteer_name VARCHAR(50), is_disaster_response BOOLEAN); INSERT INTO regions (region_id, volunteer_name, is_disaster_response) VALUES (1, 'John Doe', true), (2, 'Jane Smith', false), (3, 'Alice Johnson', true), (4, 'Bob Brown', true), (5, 'Charlie Davis', false);", "sql": "SELECT COUNT(*), volunteer_name FROM regions WHERE is_disaster_response = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What are the total number of articles and videos in the 'media_library'?", "schema": "CREATE TABLE media_library (id INT, type VARCHAR(10), title VARCHAR(50), length FLOAT, source VARCHAR(50)); INSERT INTO media_library (id, type, title, length, source) VALUES (1, 'article', 'Sample Article 1', 5.5, 'BBC'); INSERT INTO media_library (id, type, title, length, source) VALUES (2, 'video', 'Sample Video 1', 12.3, 'CNN');", "sql": "SELECT COUNT(*) FROM media_library WHERE type IN ('article', 'video');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the percentage of the population that has received the flu vaccine by disability status?", "schema": "CREATE TABLE population (person_id INT, disability_status VARCHAR(10)); CREATE TABLE flu_vaccinations (vaccination_id INT, person_id INT, vaccination_date DATE);", "sql": "SELECT p.disability_status, COUNT(DISTINCT p.person_id) * 100.0 / (SELECT COUNT(DISTINCT p.person_id) FROM population p) AS percentage FROM population p JOIN flu_vaccinations fv ON p.person_id = fv.person_id GROUP BY p.disability_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 237, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the length of the Panther Chameleon?", "schema": "CREATE TABLE table_name_15 (length__female_ VARCHAR, common_name VARCHAR)", "sql": "SELECT length__female_ FROM table_name_15 WHERE common_name = 'panther chameleon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total number of wells drilled in the Gulf of Mexico between 2017 and 2020, and what is the sum of their daily production rates of gas?", "schema": "CREATE TABLE gulf_of_mexico (id INT, well_name VARCHAR(255), drill_date DATE, daily_production_gas FLOAT);", "sql": "SELECT COUNT(*) as total_wells, SUM(daily_production_gas) as total_daily_production_gas FROM gulf_of_mexico WHERE drill_date BETWEEN '2017-01-01' AND '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the maximum number of daily transactions for each blockchain network?", "schema": "CREATE TABLE blockchains (blockchain_id INT, blockchain_name VARCHAR(50), daily_transactions INT); INSERT INTO blockchains (blockchain_id, blockchain_name, daily_transactions) VALUES (1, 'Ethereum', 50000); INSERT INTO blockchains (blockchain_id, blockchain_name, daily_transactions) VALUES (2, 'Solana', 100000);", "sql": "SELECT blockchain_name, MAX(daily_transactions) FROM blockchains GROUP BY blockchain_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the different cities listed?", "schema": "CREATE TABLE manufacturers (headquarter VARCHAR)", "sql": "SELECT DISTINCT headquarter FROM manufacturers;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the total trading volume for digital assets issued in the Asian region?", "schema": "CREATE TABLE digital_assets (asset_id INT, asset_name VARCHAR(50), region VARCHAR(50), trading_volume DECIMAL(18,2)); INSERT INTO digital_assets (asset_id, asset_name, region, trading_volume) VALUES (1, 'Bitcoin', 'Asia', 15000000), (2, 'Ethereum', 'Asia', 8000000);", "sql": "SELECT SUM(trading_volume) FROM digital_assets WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the location of the game with a score of 113-106?", "schema": "CREATE TABLE table_name_13 (location_attendance VARCHAR, score VARCHAR)", "sql": "SELECT location_attendance FROM table_name_13 WHERE score = '113-106';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the matches where the teams finished in the first group stage, in 1998?", "schema": "CREATE TABLE table_name_95 (matches VARCHAR, result VARCHAR, year VARCHAR)", "sql": "SELECT matches FROM table_name_95 WHERE result = 'first group stage' AND year = '1998';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the total appeared that has an RR Rate less than 0.17?", "schema": "CREATE TABLE table_name_95 (appeared INTEGER, rr_w_rate INTEGER)", "sql": "SELECT SUM(appeared) FROM table_name_95 WHERE rr_w_rate < 0.17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the minimum budget allocated for language preservation in 'North America'?", "schema": "CREATE TABLE LanguagePreservationNA (ProjectID INT PRIMARY KEY, ProjectName VARCHAR(50), Location VARCHAR(50), Budget DECIMAL(10,2)); INSERT INTO LanguagePreservationNA (ProjectID, ProjectName, Location, Budget) VALUES (1, 'Navajo Language', 'United States', 150000.00), (2, 'Inuit Language', 'Canada', 200000.00);", "sql": "SELECT MIN(Budget) FROM LanguagePreservationNA WHERE Location LIKE '%North America%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of algorithmic fairness incidents in each region for the AI Writer application?", "schema": "CREATE TABLE IncidentByApp (id INT, app VARCHAR(255), region VARCHAR(255), incident_count INT); INSERT INTO IncidentByApp (id, app, region, incident_count) VALUES (1, 'AI Writer', 'North America', 12), (2, 'AI Artist', 'Europe', 15), (3, 'AI Composer', 'Asia', 8), (4, 'AI Writer', 'South America', 5), (5, 'AI Artist', 'Africa', 2), (6, 'AI Composer', 'North America', 10), (7, 'AI Writer', 'Europe', 18), (8, 'AI Writer', 'Asia', 9), (9, 'AI Writer', 'Africa', 7);", "sql": "SELECT region, SUM(incident_count) as total_incidents FROM IncidentByApp WHERE app = 'AI Writer' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Update the carbon offset initiative records for project 'Urban Forest' to have a total offset of 5000 tons.", "schema": "CREATE TABLE carbon_offset_initiatives ( id INT PRIMARY KEY, project_name VARCHAR(255), total_offset INT );", "sql": "UPDATE carbon_offset_initiatives SET total_offset = 5000 WHERE project_name = 'Urban Forest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List all astronauts who have participated in space missions and their medical records.", "schema": "CREATE TABLE Astronauts (id INT, name VARCHAR(255), age INT); CREATE TABLE SpaceMissions (id INT, astronaut_id INT, mission VARCHAR(255)); CREATE TABLE MedicalRecords (id INT, astronaut_id INT, medical_condition VARCHAR(255));", "sql": "SELECT Astronauts.name, SpaceMissions.mission, MedicalRecords.medical_condition FROM Astronauts INNER JOIN SpaceMissions ON Astronauts.id = SpaceMissions.astronaut_id INNER JOIN MedicalRecords ON Astronauts.id = MedicalRecords.astronaut_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who earned the save in the game against the Sinon Bulls when Jeriome Robertson took the loss?", "schema": "CREATE TABLE table_name_32 (save VARCHAR, opponent VARCHAR, loss VARCHAR)", "sql": "SELECT save FROM table_name_32 WHERE opponent = 'sinon bulls' AND loss = 'jeriome robertson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 71).", "schema": null, "sql": "CREATE FUNCTION gbt_int4_penalty(internal,internal,internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of the episode number 11 of the season?", "schema": "CREATE TABLE table_27833469_1 (title VARCHAR, season__number VARCHAR)", "sql": "SELECT title FROM table_27833469_1 WHERE season__number = '11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Avg. ticket sales for concerts in 2022", "schema": "CREATE TABLE concerts (id INT, concert_date DATE, ticket_sales INT); INSERT INTO concerts (id, concert_date, ticket_sales) VALUES (1, '2022-01-01', 5000);", "sql": "SELECT AVG(ticket_sales) AS avg_ticket_sales FROM concerts WHERE YEAR(concert_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Find the number of sustainable seafood certifications by country.", "schema": "CREATE TABLE seafood_certifications (id INT, country VARCHAR(50), certification VARCHAR(50)); INSERT INTO seafood_certifications (id, country, certification) VALUES (1, 'Norway', 'MSC'), (2, 'Norway', 'ASC'), (3, 'Canada', 'MSC');", "sql": "SELECT country, COUNT(DISTINCT certification) FROM seafood_certifications GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "How many civil cases were handled by attorneys from the 'Downtown' office location?", "schema": "CREATE TABLE Attorneys (AttorneyID INT, OfficeLocation VARCHAR(255)); INSERT INTO Attorneys (AttorneyID, OfficeLocation) VALUES (1, 'Downtown'), (2, 'Uptown'), (3, 'Downtown'), (4, 'Suburbs'); CREATE TABLE Cases (CaseID INT, AttorneyID INT, CaseType VARCHAR(255)); INSERT INTO Cases (CaseID, AttorneyID, CaseType) VALUES (1, 1, 'Civil'), (2, 1, 'Criminal'), (3, 2, 'Civil'), (4, 3, 'Civil'), (5, 4, 'Criminal');", "sql": "SELECT COUNT(*) FROM Cases JOIN Attorneys ON Cases.AttorneyID = Attorneys.AttorneyID WHERE OfficeLocation = 'Downtown' AND CaseType = 'Civil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 1172).", "schema": null, "sql": "INSERT INTO fk_r VALUES (2, 2, 1);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What is the average valuation of companies in the renewable energy sector that have received funding from a specific investor?", "schema": "CREATE TABLE valuations (id INT, company_id INT, valuation INT); INSERT INTO valuations (id, company_id, valuation) VALUES (1, 1, 10000000), (2, 2, 20000000), (3, 3, 30000000);", "sql": "SELECT AVG(valuations.valuation) FROM valuations JOIN investment_rounds ON valuations.company_id = investment_rounds.company_id JOIN investors ON investment_rounds.investor_id = investors.id JOIN companies ON investment_rounds.company_id = companies.id WHERE companies.industry = 'Renewable Energy' AND investors.name = 'GreenTech Ventures';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 341, "num_statements": 1} {"question": "What is the total number of volunteers for each organization by year in the 'volunteers' table?", "schema": "CREATE TABLE volunteers (volunteer_id INT, org_id INT, volunteer_year INT, num_volunteers INT);", "sql": "SELECT org_id, volunteer_year, SUM(num_volunteers) FROM volunteers GROUP BY org_id, volunteer_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "How many exoplanets have been discovered by the Kepler Space Telescope by the end of 2022?", "schema": "CREATE TABLE exoplanets(id INT, name VARCHAR(255), discovery_date DATE, discovery_method VARCHAR(255), telescope VARCHAR(255)); INSERT INTO exoplanets VALUES (1, 'Kepler-10b', '2010-01-04', 'Transit Method', 'Kepler Space Telescope'); INSERT INTO exoplanets VALUES (2, 'Kepler-11b', '2011-02-02', 'Transit Method', 'Kepler Space Telescope'); INSERT INTO exoplanets VALUES (3, 'Kepler-12b', '2011-12-20', 'Transit Method', 'Kepler Space Telescope');", "sql": "SELECT COUNT(*) FROM exoplanets WHERE telescope = 'Kepler Space Telescope' AND discovery_date <= '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which school left in 1968 and has the team name of Panthers?", "schema": "CREATE TABLE table_name_81 (school VARCHAR, team_name VARCHAR, year_left VARCHAR)", "sql": "SELECT school FROM table_name_81 WHERE team_name = 'panthers' AND year_left = '1968';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What's the name, AI topic, and country of AI researchers who have published more than 10 papers in the last 2 years?", "schema": "CREATE TABLE ais (id INT, name VARCHAR(255), country VARCHAR(255), topic VARCHAR(255), publications INT, publication_date DATE);", "sql": "SELECT ais.name, ais.topic, ais.country FROM ais INNER JOIN (SELECT id, COUNT(*) as num_publications FROM ais WHERE publication_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 YEAR) GROUP BY id) AS recent_publications ON ais.id = recent_publications.id WHERE recent_publications.num_publications > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 297, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The Home team of Sydney had which ground?", "schema": "CREATE TABLE table_name_1 (ground VARCHAR, home_team VARCHAR)", "sql": "SELECT ground FROM table_name_1 WHERE home_team = 'sydney';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is dma?", "schema": "CREATE TABLE table_19131921_1 (dma INTEGER)", "sql": "SELECT MIN(dma) FROM table_19131921_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which party had an incumbent of W. Jasper Talbert?", "schema": "CREATE TABLE table_name_70 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT party FROM table_name_70 WHERE incumbent = 'w. jasper talbert';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "How many mining accidents were reported in Asia in the last 5 years?", "schema": "CREATE TABLE Accidents (AccidentID INT, CompanyID INT, AccidentDate DATE); INSERT INTO Accidents (AccidentID, CompanyID, AccidentDate) VALUES (1, 1, '2020-01-01'), (2, 2, '2019-12-15'), (3, 3, '2018-05-23'), (4, 4, '2017-09-04'), (5, 1, '2016-02-10');", "sql": "SELECT COUNT(*) FROM Accidents WHERE YEAR(AccidentDate) >= YEAR(CURRENT_DATE) - 5 AND Country IN ('China', 'India', 'Indonesia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Sort employee names by their age in ascending order.", "schema": "CREATE TABLE employee (name VARCHAR, age VARCHAR)", "sql": "SELECT name FROM employee ORDER BY age;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "How many unique health conditions were reported in total in Vancouver during 2019?", "schema": "CREATE TABLE reports (id INT, patient_id INT, condition TEXT, city TEXT, date DATE); INSERT INTO reports (id, patient_id, condition, city, date) VALUES (1, 1, 'Flu', 'Vancouver', '2019-01-01'); INSERT INTO reports (id, patient_id, condition, city, date) VALUES (2, 2, 'Cold', 'Vancouver', '2019-02-01');", "sql": "SELECT COUNT(DISTINCT condition) FROM reports WHERE city = 'Vancouver' AND date BETWEEN '2019-01-01' AND '2019-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 404).", "schema": null, "sql": "select count(*) from test_multirange_gist where mr &> 'empty'::int4range;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_multirange_gist where mr &> 'empty'::int4range) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average claim amount and number of claims for policyholders who are male and have a policy type of 'Auto'?", "schema": "CREATE TABLE Policyholders (Id INT PRIMARY KEY, FirstName VARCHAR(50), LastName VARCHAR(50), Age INT, Gender VARCHAR(10)); CREATE TABLE Policies (Id INT PRIMARY KEY, PolicyholderId INT, PolicyType VARCHAR(50), CoverageAmount DECIMAL(10,2), FOREIGN KEY (PolicyholderId) REFERENCES Policyholders(Id)); CREATE TABLE Claims (Id INT PRIMARY KEY, PolicyId INT, ClaimAmount DECIMAL(10,2), ClaimDate DATE, FOREIGN KEY (PolicyId) REFERENCES Policies(Id));", "sql": "SELECT P.Gender, PL.PolicyType, AVG(C.ClaimAmount) as AverageClaimAmount, COUNT(C.Id) as NumberOfClaims FROM Policyholders P JOIN Policies PL ON P.Id = PL.PolicyholderId JOIN Claims C ON PL.Id = C.PolicyId WHERE P.Gender = 'Male' AND PL.PolicyType = 'Auto' GROUP BY P.Gender, PL.PolicyType ORDER BY AverageClaimAmount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 323, "num_statements": 1} {"question": "Create a table named 'soil_moisture' with sensor_id, timestamp, and moisture", "schema": "CREATE TABLE farm_sensors (sensor_id INT, timestamp TIMESTAMP, temperature INT); CREATE TABLE crop_temperature (crop_id INT, crop_type VARCHAR(50), timestamp TIMESTAMP, temperature INT);", "sql": "CREATE TABLE soil_moisture (sensor_id INT, timestamp TIMESTAMP, moisture INT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the product ids and the number of unique orders containing each product.", "schema": "CREATE TABLE Order_items (product_id VARCHAR, order_id VARCHAR)", "sql": "SELECT product_id, COUNT(DISTINCT order_id) FROM Order_items GROUP BY product_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what measurement does the contestant from sindelfingen have?", "schema": "CREATE TABLE table_26427332_17 (measurements__in_cm_ VARCHAR, city VARCHAR)", "sql": "SELECT measurements__in_cm_ FROM table_26427332_17 WHERE city = 'Sindelfingen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average price of vegetarian dishes offered by restaurants in New York?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, city TEXT, state TEXT); INSERT INTO restaurants (id, name, city, state) VALUES (1, 'Restaurant A', 'New York', 'NY'), (2, 'Restaurant B', 'New York', 'NY'); CREATE TABLE dishes (id INT, name TEXT, price DECIMAL, restaurant_id INT, vegetarian BOOLEAN); INSERT INTO dishes (id, name, price, restaurant_id, vegetarian) VALUES (1, 'Veggie Burger', 12.50, 1, true), (2, 'Pasta Primavera', 16.00, 1, true), (3, 'Cheeseburger', 10.50, 1, false), (4, 'Fish and Chips', 15.00, 2, false);", "sql": "SELECT AVG(price) FROM dishes WHERE vegetarian = true AND restaurant_id IN (SELECT id FROM restaurants WHERE city = 'New York');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the maximum amount of funding received by a company founded by a founder from Oceania?", "schema": "CREATE TABLE company (id INT, name TEXT, founding_date DATE, industry TEXT, headquarters TEXT, oceania_founder BOOLEAN); CREATE TABLE funding_rounds (id INT, company_id INT, funding_amount INT, round_type TEXT, date DATE);", "sql": "SELECT MAX(funding_amount) FROM funding_rounds JOIN company ON funding_rounds.company_id = company.id WHERE oceania_founder = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Show a SQL definition from the pglogical project (node_origin_cascade, item 26).", "schema": null, "sql": "-- Table that replicates from top level provider to mid-level pglogical node.\nSELECT pglogical.replicate_ddl_command($$\n\tCREATE TABLE public.mid_level_tbl (\n\t\tid serial primary key,\n\t\tother integer,\n\t\tdata text,\n\t\tsomething interval\n\t);\n$$);", "explanation": "SQL definition from the open-source pglogical PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 241, "num_statements": 2} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 1019).", "schema": null, "sql": "SELECT pg_lsn('NaN'::numeric);", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_lsn('NaN'::numeric)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the party with first elected being 1926", "schema": "CREATE TABLE table_1342256_6 (party VARCHAR, first_elected VARCHAR)", "sql": "SELECT party FROM table_1342256_6 WHERE first_elected = '1926';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'indexing' (example 411).", "schema": null, "sql": "insert into idxpart (b, a) values ('one', 142857), ('two', 285714);", "explanation": "DML from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What venue did the game on september 5, 1998 take place at?", "schema": "CREATE TABLE table_name_71 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_71 WHERE date = 'september 5, 1998';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the maximum cargo weight for each port?", "schema": "CREATE TABLE port_cargo_weight (port_id INT, port_name VARCHAR(50), cargo_weight INT); INSERT INTO port_cargo_weight VALUES (1, 'PortF', 2000), (2, 'PortF', 2200), (3, 'PortG', 2500), (4, 'PortG', 2800);", "sql": "SELECT port_name, MAX(cargo_weight) FROM port_cargo_weight GROUP BY port_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was Henry Daniel first elected?", "schema": "CREATE TABLE table_2668243_8 (first_elected VARCHAR, incumbent VARCHAR)", "sql": "SELECT first_elected FROM table_2668243_8 WHERE incumbent = 'Henry Daniel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of machines in the 'machine_1' and 'machine_2' categories?", "schema": "CREATE TABLE machines (id INT, name VARCHAR(50), category VARCHAR(20)); INSERT INTO machines (id, name, category) VALUES (1, 'Machine 1', 'machine_1'), (2, 'Machine 2', 'machine_2'), (3, 'Machine 3', 'machine_3');", "sql": "SELECT COUNT(*) FROM machines WHERE category IN ('machine_1', 'machine_2');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What farm has a capacity of 70 and is operational?", "schema": "CREATE TABLE table_26387382_1 (name VARCHAR, capacity__mw_ VARCHAR, status VARCHAR)", "sql": "SELECT name FROM table_26387382_1 WHERE capacity__mw_ = '70' AND status = 'Operational';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the maximum membership duration in the 'retail' sector?", "schema": "CREATE TABLE union_members (member_id INT, sector VARCHAR(20), membership_duration INT); INSERT INTO union_members (member_id, sector, membership_duration) VALUES (1, 'Retail', 10), (2, 'Retail', 12), (3, 'Healthcare', 8);", "sql": "SELECT MAX(membership_duration) FROM union_members WHERE sector = 'Retail';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total revenue from concert ticket sales for artists from the United States?", "schema": "CREATE TABLE Artists (artist_id INT, artist_name TEXT, country TEXT); CREATE TABLE Concerts (concert_id INT, artist_id INT, ticket_sales_revenue FLOAT);", "sql": "SELECT SUM(Concerts.ticket_sales_revenue) FROM Concerts INNER JOIN Artists ON Concerts.artist_id = Artists.artist_id WHERE Artists.country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the engagement rate (likes + comments) for posts in the fashion category?", "schema": "CREATE TABLE post_stats (post_id INT, category VARCHAR(50), likes INT, comments INT); INSERT INTO post_stats (post_id, category, likes, comments) VALUES (1, 'fashion', 100, 25), (2, 'fashion', 200, 50), (3, 'beauty', 150, 75);", "sql": "SELECT post_id, (likes + comments) AS engagement_rate FROM post_stats WHERE category = 'fashion';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What are the names of all bioprocess engineering projects funded by venture capitalists located in the US?", "schema": "CREATE TABLE bioprocess_engineering (project_name VARCHAR(255), location VARCHAR(255), funder VARCHAR(255)); INSERT INTO bioprocess_engineering (project_name, location, funder) VALUES ('ProjX', 'USA', 'Venture Capital X');", "sql": "SELECT project_name FROM bioprocess_engineering WHERE location = 'USA' AND funder LIKE '%Venture Capital%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 62).", "schema": null, "sql": "-- test isnt_ancestor_of\nSELECT * FROM check_test(\n isnt_ancestor_of( 'hide', 'h_parent', 'hide', 'h_child1', 1, 'Lookie' ),\n false,\n 'isnt_ancestor_of(psch, ptab, csch, ctab, 1, desc)',\n 'Lookie',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "List all timber production sites located in 'Asia' or 'Africa' regions.", "schema": "CREATE TABLE timber_production (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO timber_production (id, name, region) VALUES (1, 'Timber Inc.', 'Asia'), (2, 'WoodCo', 'Africa'), (3, 'Forest Ltd.', 'North America');", "sql": "SELECT name FROM timber_production WHERE region IN ('Asia', 'Africa');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the lowest Crowd that has the Away team of melbourne tigers?", "schema": "CREATE TABLE table_name_24 (crowd INTEGER, away_team VARCHAR)", "sql": "SELECT MIN(crowd) FROM table_name_24 WHERE away_team = 'melbourne tigers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Class Pos has a Year larger than 1997, a Team of corvette racing, a Class of gt1, and 327 laps?", "schema": "CREATE TABLE table_name_25 (class VARCHAR, laps VARCHAR, year VARCHAR, team VARCHAR)", "sql": "SELECT class AS pos FROM table_name_25 WHERE year > 1997 AND team = 'corvette racing' AND class = 'gt1' AND laps = 327;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year is the Grim Fandango with a windows platform?", "schema": "CREATE TABLE table_name_36 (year VARCHAR, platform_s_ VARCHAR, game VARCHAR)", "sql": "SELECT year FROM table_name_36 WHERE platform_s_ = 'windows' AND game = 'grim fandango';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average donation amount for each program in 2022?", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName VARCHAR(50)); CREATE TABLE Donations (DonationID INT, ProgramID INT, DonationAmount DECIMAL(10, 2), DonationDate DATE); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, 'Art Education'), (2, 'Theater Production'), (3, 'Music Conservatory'); INSERT INTO Donations (DonationID, ProgramID, DonationAmount, DonationDate) VALUES (1, 1, 100, '2022-01-05'), (2, 1, 150, '2022-02-10'), (3, 2, 200, '2022-03-20');", "sql": "SELECT p.ProgramName, AVG(d.DonationAmount) as AvgDonationAmount FROM Donations d INNER JOIN Programs p ON d.ProgramID = p.ProgramID WHERE d.DonationDate >= '2022-01-01' AND d.DonationDate < '2023-01-01' GROUP BY p.ProgramName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 227, "num_statements": 1} {"question": "pgTAP test for Usergroup (assertion 18).", "schema": null, "sql": "SELECT * FROM check_test(\n has_group('meanies', 'whatever'),\n true,\n 'has_group(group, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Usergroup.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the total number of streams for each artist in Canada and Germany?", "schema": "CREATE TABLE streams (id INT, artist VARCHAR(255), country VARCHAR(255), streams INT); INSERT INTO streams (id, artist, country, streams) VALUES (1, 'Artist1', 'Canada', 1000000), (2, 'Artist2', 'Germany', 800000), (3, 'Artist1', 'Canada', 1200000), (4, 'Artist3', 'Germany', 900000);", "sql": "SELECT artist, SUM(streams) AS total_streams FROM streams WHERE country IN ('Canada', 'Germany') GROUP BY artist;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest bronze a team with 9 silvers, a total larger than 13, and more than 13 gold medals has?", "schema": "CREATE TABLE table_name_58 (bronze INTEGER, gold VARCHAR, silver VARCHAR, total VARCHAR)", "sql": "SELECT MIN(bronze) FROM table_name_58 WHERE silver = 9 AND total > 13 AND gold > 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the position is 9th, what is the total podiums number?", "schema": "CREATE TABLE table_21795846_1 (podiums VARCHAR, position VARCHAR)", "sql": "SELECT COUNT(podiums) FROM table_21795846_1 WHERE position = '9th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Winning Jockey ran in the Tampa Bay Downs Track on Winning Horse Barkley Sound?", "schema": "CREATE TABLE table_name_86 (winning_jockey VARCHAR, track VARCHAR, winning_horse VARCHAR)", "sql": "SELECT winning_jockey FROM table_name_86 WHERE track = 'tampa bay downs' AND winning_horse = 'barkley sound';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the charity of the celebrity with the background reality star?", "schema": "CREATE TABLE table_12286195_1 (charity VARCHAR, background VARCHAR)", "sql": "SELECT charity FROM table_12286195_1 WHERE background = 'Reality Star';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the game number played on April 3?", "schema": "CREATE TABLE table_27723526_13 (game INTEGER, date VARCHAR)", "sql": "SELECT MAX(game) FROM table_27723526_13 WHERE date = 'April 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For delegate is gaines, tawanna p. tawanna gaines, please specify all the party.", "schema": "CREATE TABLE table_27050336_7 (party VARCHAR, delegate VARCHAR)", "sql": "SELECT party FROM table_27050336_7 WHERE delegate = 'Gaines, Tawanna P. Tawanna Gaines';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total number of recipients in the 'community_development' schema, grouped by gender?", "schema": "CREATE TABLE community_development.recipients_grouped (recipient_gender VARCHAR(10), recipient_count INT); INSERT INTO community_development.recipients_grouped (recipient_gender, recipient_count) VALUES ('Male', 50), ('Female', 75), ('Non-binary', 10);", "sql": "SELECT * FROM community_development.recipients_grouped;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Find the top ten chemical reactions with the highest energy consumption.", "schema": "CREATE TABLE chemical_reactions (reaction_id INT, energy_consumption FLOAT, reaction_type VARCHAR(50)); INSERT INTO chemical_reactions (reaction_id, energy_consumption, reaction_type) VALUES (1, 1000, 'synthesis'); INSERT INTO chemical_reactions (reaction_id, energy_consumption, reaction_type) VALUES (2, 1200, 'decomposition');", "sql": "SELECT reaction_id, energy_consumption FROM (SELECT reaction_id, energy_consumption, RANK() OVER (ORDER BY energy_consumption DESC) as rank FROM chemical_reactions) as subquery WHERE rank <= 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 194, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 547).", "schema": null, "sql": "UPDATE temporal_mltrng SET valid_at = datemultirange(daterange('2018-01-15', '2018-03-01'))\n WHERE id = '[1,2)' AND valid_at @> '2018-01-15'::date;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has a silver medal in the boxing, heavyweight event?", "schema": "CREATE TABLE table_name_77 (country VARCHAR, medal VARCHAR, event VARCHAR)", "sql": "SELECT country FROM table_name_77 WHERE medal = 'silver' AND event = 'boxing, heavyweight';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team is from Lake Oval?", "schema": "CREATE TABLE table_name_93 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team FROM table_name_93 WHERE venue = 'lake oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the average lifespan of artists who have exhibited their works in both the 'Metropolitan Museum of Art' and the 'Tate Modern'?", "schema": "CREATE TABLE Galleries (GalleryID INT PRIMARY KEY, Name VARCHAR(50), City VARCHAR(50), Country VARCHAR(50)); INSERT INTO Galleries (GalleryID, Name, City, Country) VALUES (16, 'Metropolitan Museum of Art', 'New York', 'USA'), (17, 'Tate Modern', 'London', 'UK');", "sql": "SELECT AVG(DeathYear - BirthYear) FROM Artists WHERE ArtistID IN (SELECT ArtistID FROM Exhibitions WHERE GalleryID IN (16, 17) GROUP BY ArtistID HAVING COUNT(DISTINCT GalleryID) = 2);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: display the full name (first and last), hire date, salary, and department number for those employees whose first name does not containing the letter M.", "schema": "CREATE TABLE employees (first_name VARCHAR, last_name VARCHAR, hire_date VARCHAR, salary VARCHAR, department_id VARCHAR)", "sql": "SELECT first_name, last_name, hire_date, salary, department_id FROM employees WHERE NOT first_name LIKE '%M%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the maximum CO2 emission per garment in factories located in the 'Seoul' region?", "schema": "CREATE TABLE factories (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), co2_emission_per_year INT);", "sql": "SELECT MAX(co2_emission_per_garment) FROM (SELECT (co2_emission_per_year / quantity_manufactured) as co2_emission_per_garment FROM factories WHERE location LIKE 'Seoul%') as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 29).", "schema": null, "sql": "SELECT\n t, lower(t), initcap(t), upper(t),\n length(convert_to(t, 'UTF8')) AS t_bytes,\n length(convert_to(lower(t), 'UTF8')) AS lower_t_bytes,\n length(convert_to(initcap(t), 'UTF8')) AS initcap_t_bytes,\n length(convert_to(upper(t), 'UTF8')) AS upper_t_bytes\n FROM test_pg_unicode_fast;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT\n t, lower(t), initcap(t), upper(t),\n length(convert_to(t, 'UTF8')) AS t_bytes,\n length(convert_to(lower(t), 'UTF8')) AS lower_t_bytes,\n length(convert_to(initcap(t), 'UTF8')) AS initcap_t_bytes,\n length(convert_to(upper(t), 'UTF8')) AS upper_t_bytes\n FROM test_pg_unicode_fast) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 300, "num_statements": 1} {"question": "What is the total quantity of minerals extracted (in kg) for all mining projects in Oceania that have a reported environmental impact?", "schema": "CREATE TABLE productivity (project_id INT, region TEXT, mineral TEXT, quantity INT, environmental_impact TEXT); INSERT INTO productivity (project_id, region, mineral, quantity, environmental_impact) VALUES (1, 'Oceania', 'gold', 1200, 'high'), (2, 'Oceania', 'copper', 1500, 'low');", "sql": "SELECT SUM(quantity) FROM productivity WHERE region = 'Oceania' AND environmental_impact IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the filing date of the oldest case in the 'court_cases' table?", "schema": "CREATE TABLE court_cases (id INT, case_number INT, filing_date DATE, case_type VARCHAR(50), case_status VARCHAR(50)); INSERT INTO court_cases (id, case_number, filing_date, case_type, case_status) VALUES (1, 2021001, '2021-01-01', 'Civil', 'Open'), (2, 2021002, '2021-02-01', 'Criminal', 'Closed'), (3, 2021003, '2021-03-01', 'Civil', 'Open');", "sql": "SELECT filing_date FROM court_cases ORDER BY filing_date LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Calculate the average funding for companies founded by veterans in the renewable energy industry", "schema": "CREATE TABLE diversity (id INT, company_id INT, founder_veteran BOOLEAN); CREATE TABLE companies (id INT, industry VARCHAR(255), founding_date DATE); CREATE TABLE funding (company_id INT, amount INT); INSERT INTO diversity SELECT 1, 1, true; INSERT INTO diversity SELECT 2, 2, false; INSERT INTO diversity SELECT 3, 3, true; INSERT INTO companies (id, industry, founding_date) SELECT 2, 'Finance', '2012-01-01'; INSERT INTO companies (id, industry, founding_date) SELECT 3, 'Renewable Energy', '2016-01-01'; INSERT INTO companies (id, industry, founding_date) SELECT 4, 'Retail', '2021-01-01'; INSERT INTO funding (company_id, amount) SELECT 2, 1250000; INSERT INTO funding (company_id, amount) SELECT 3, 1750000; INSERT INTO funding (company_id, amount) SELECT 4, 2250000;", "sql": "SELECT AVG(funding.amount) FROM funding JOIN companies ON funding.company_id = companies.id JOIN diversity ON companies.id = diversity.company_id WHERE companies.industry = 'Renewable Energy' AND diversity.founder_veteran = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rank for less than 6 plays?", "schema": "CREATE TABLE table_name_47 (rank INTEGER, played INTEGER)", "sql": "SELECT SUM(rank) FROM table_name_47 WHERE played < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the game with 39,592 attendance?", "schema": "CREATE TABLE table_name_19 (score VARCHAR, attendance VARCHAR)", "sql": "SELECT score FROM table_name_19 WHERE attendance = '39,592';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total population with less than 789 males?", "schema": "CREATE TABLE table_name_12 (total_population VARCHAR, male INTEGER)", "sql": "SELECT COUNT(total_population) FROM table_name_12 WHERE male < 789;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "List all companies in the 'social_impact' sector.", "schema": "CREATE TABLE companies (id INT, name TEXT, sector TEXT); INSERT INTO companies (id, name, sector) VALUES (1, 'SocialCo', 'social_impact'), (2, 'ImpactInc', 'social_impact'), (3, 'ChangeOrg', 'non-profit'), (4, 'GreenCorp', 'renewable_energy');", "sql": "SELECT * FROM companies WHERE sector = 'social_impact';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the opponent on november 24?", "schema": "CREATE TABLE table_name_68 (opponent VARCHAR, november VARCHAR)", "sql": "SELECT opponent FROM table_name_68 WHERE november = 24;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Elector with a Faction of roman, and an Elevated with 1244, may 28?", "schema": "CREATE TABLE table_name_56 (elector VARCHAR, faction VARCHAR, elevated VARCHAR)", "sql": "SELECT elector FROM table_name_56 WHERE faction = 'roman' AND elevated = '1244, may 28';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many tree species are in the kitechura reserve?", "schema": "CREATE TABLE table_16577990_1 (tree_species VARCHAR, central_forest_reserve VARCHAR)", "sql": "SELECT tree_species FROM table_16577990_1 WHERE central_forest_reserve = 'Kitechura';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Count the number of diamond mines in Russia that have not had inspections in the past year.", "schema": "CREATE TABLE diamond_mines (id INT, name TEXT, location TEXT, labor_disputes INT, inspections INT, violations INT); INSERT INTO diamond_mines (id, name, location, labor_disputes, inspections, violations) VALUES (1, 'Russian Diamond Mine', 'Russia', 1, 0, 0); INSERT INTO diamond_mines (id, name, location, labor_disputes, inspections, violations) VALUES (2, 'Siberian Diamond Mine', 'Russia', 0, 2, 1);", "sql": "SELECT COUNT(*) FROM diamond_mines WHERE location = 'Russia' AND inspections = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 100).", "schema": null, "sql": "SELECT xmlserialize(DOCUMENT '73' AS text INDENT);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlserialize(DOCUMENT '73' AS text INDENT)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the average response time for each city, considering the previous two records in addition to the current one, ordered by date?", "schema": "CREATE TABLE ResponseTimes (ID INT, City VARCHAR(50), Date TIMESTAMP, Time INT); INSERT INTO ResponseTimes (ID, City, Date, Time) VALUES (7, 'CityC', '2015-01-01 00:00:00', 6), (8, 'CityC', '2015-01-01 01:00:00', 7), (9, 'CityC', '2015-01-01 02:00:00', 8), (10, 'CityD', '2015-01-01 00:00:00', 5), (11, 'CityD', '2015-01-01 01:00:00', 4), (12, 'CityD', '2015-01-01 02:00:00', 3);", "sql": "SELECT City, AVG(Time) OVER (PARTITION BY City ORDER BY Date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS AvgResponseTime FROM ResponseTimes;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 141, "num_statements": 1} {"question": "List all companies that have not yet had an exit event", "schema": "CREATE TABLE company_founding(id INT PRIMARY KEY, company_name VARCHAR(100)); CREATE TABLE exit_strategies(id INT PRIMARY KEY, company_id INT, exit_type VARCHAR(50)); INSERT INTO company_founding VALUES (1, 'Acme Inc'); INSERT INTO company_founding VALUES (2, 'Beta Corp'); INSERT INTO exit_strategies VALUES (1, 1, 'Acquisition');", "sql": "SELECT cf.company_name FROM company_founding cf LEFT JOIN exit_strategies es ON cf.id = es.company_id WHERE es.company_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the status of the unit with a net capacity of 1212 MW?", "schema": "CREATE TABLE table_name_50 (status VARCHAR, net_capacity VARCHAR)", "sql": "SELECT status FROM table_name_50 WHERE net_capacity = '1212 mw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest year that has more than 5 points and a renault ef15 1.5 v6 t engine?", "schema": "CREATE TABLE table_name_20 (year INTEGER, engine VARCHAR, points VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_20 WHERE engine = 'renault ef15 1.5 v6 t' AND points > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the average rating for nonprofits that have received donations in the last month?", "schema": "CREATE TABLE donations (id INT, nonprofit_id INT, amount FLOAT, donation_date DATE); INSERT INTO donations (id, nonprofit_id, amount, donation_date) VALUES (1, 1, 500, '2022-03-15'), (2, 2, 1000, '2022-03-20'), (3, 1, 750, '2022-03-25'); CREATE TABLE nonprofit_ratings (id INT, nonprofit_id INT, rating INT); INSERT INTO nonprofit_ratings (id, nonprofit_id, rating) VALUES (1, 1, 4), (2, 1, 5), (3, 2, 3);", "sql": "SELECT nonprofit_id, AVG(rating) FROM nonprofit_ratings JOIN (SELECT nonprofit_id FROM donations WHERE donation_date BETWEEN DATEADD(day, -30, GETDATE()) AND GETDATE() GROUP BY nonprofit_id) AS last_month_donors ON nonprofit_ratings.nonprofit_id = last_month_donors.nonprofit_id GROUP BY nonprofit_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 301, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On February 10, what was the location attendance?", "schema": "CREATE TABLE table_name_49 (location_attendance VARCHAR, date VARCHAR)", "sql": "SELECT location_attendance FROM table_name_49 WHERE date = 'february 10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Motherwell Team's manner of departure?", "schema": "CREATE TABLE table_name_57 (manner_of_departure VARCHAR, team VARCHAR)", "sql": "SELECT manner_of_departure FROM table_name_57 WHERE team = 'motherwell';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many community events were held in London, including both online and offline events?", "schema": "CREATE TABLE CommunityEvents (id INT, city VARCHAR(20), event_type VARCHAR(10), participation INT); INSERT INTO CommunityEvents (id, city, event_type, participation) VALUES (1, 'London', 'Online', 50), (2, 'London', 'Offline', 20), (3, 'Paris', 'Online', 30);", "sql": "SELECT COUNT(*) FROM CommunityEvents WHERE city = 'London' AND event_type IN ('Online', 'Offline');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Update \"union_members\" table where the \"id\" is 2", "schema": "CREATE TABLE union_members (id INT, name VARCHAR(50), state VARCHAR(2), city VARCHAR(20), occupation VARCHAR(20)); INSERT INTO union_members (id, name, state, city, occupation) VALUES (1, 'John Doe', 'NY', 'New York', 'Engineer'); INSERT INTO union_members (id, name, state, city, occupation) VALUES (2, 'Jane Smith', 'CA', 'Los Angeles', 'Teacher');", "sql": "UPDATE union_members SET state = 'WA', city = 'Seattle' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: when was the premiere when a 3.3 millions of North American watched the episode whose writer was Rob Wright?", "schema": "CREATE TABLE table_21313498_1 (original_air_date VARCHAR, us_viewers__millions_ VARCHAR, written_by VARCHAR)", "sql": "SELECT original_air_date FROM table_21313498_1 WHERE us_viewers__millions_ = '3.3' AND written_by = 'Rob Wright';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many titles are listed with 8.44 million viewers?", "schema": "CREATE TABLE table_24910733_1 (title VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT COUNT(title) FROM table_24910733_1 WHERE us_viewers__millions_ = '8.44';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total number of hospital beds in rural hospitals in Central America?", "schema": "CREATE TABLE rural_hospitals_beds_2 (hospital_id INT, region VARCHAR(10), num_of_beds INT); INSERT INTO rural_hospitals_beds_2 (hospital_id, region, num_of_beds) VALUES (1, 'Central America', 150), (2, 'Middle East', 170), (3, 'Central America', 160), (4, 'Eastern Europe', 140);", "sql": "SELECT SUM(num_of_beds) FROM rural_hospitals_beds_2 WHERE region = 'Central America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average water usage and extraction cost for companies in the mining industry?", "schema": "CREATE TABLE Water_Usage (Company VARCHAR(255), Year INT, Water_Consumption FLOAT); INSERT INTO Water_Usage (Company, Year, Water_Consumption) VALUES ('JKL Mining', 2019, 567.8), ('MNO Mining', 2020, 678.9); CREATE TABLE Mineral_Extraction (Company VARCHAR(255), Year INT, Extraction_Cost FLOAT); INSERT INTO Mineral_Extraction (Company, Year, Extraction_Cost) VALUES ('JKL Mining', 2019, 800.5), ('MNO Mining', 2020, 850.6);", "sql": "SELECT W.Company, AVG(W.Water_Consumption) AS Average_Water_Usage, M.Extraction_Cost FROM Water_Usage W JOIN Mineral_Extraction M ON W.Company = M.Company WHERE W.Year = M.Year GROUP BY W.Company, M.Extraction_Cost;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "What is the total landfill capacity for European countries?", "schema": "CREATE TABLE LandfillCapacities (country VARCHAR(50), capacity INT); INSERT INTO LandfillCapacities (country, capacity) VALUES ('Germany', 120000), ('France', 90000), ('UK', 80000);", "sql": "SELECT SUM(capacity) FROM LandfillCapacities WHERE country IN ('Germany', 'France', 'UK', 'Italy', 'Spain');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 130).", "schema": null, "sql": "INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Player with a Pick that is 6?", "schema": "CREATE TABLE table_name_15 (player VARCHAR, pick VARCHAR)", "sql": "SELECT player FROM table_name_15 WHERE pick = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the average citizen feedback score for each service provided in urban and rural areas?", "schema": "CREATE TABLE CitizenFeedback (AreaType VARCHAR(50), Service VARCHAR(50), Score INT); INSERT INTO CitizenFeedback (AreaType, Service, Score) VALUES ('Urban', 'Waste Collection', 8), ('Urban', 'Street Lighting', 9), ('Rural', 'Water Supply', 7), ('Rural', 'Road Maintenance', 6);", "sql": "SELECT AreaType, Service, AVG(Score) OVER(PARTITION BY AreaType, Service) as AvgScore FROM CitizenFeedback;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 107, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 42).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (0,7,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_join': Write the SELECT query (example 209).", "schema": null, "sql": "SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;", "explanation": "Regression test for Partition Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What is the maximum amount of funding received by a startup with a female founder in the healthcare sector?", "schema": "CREATE TABLE funding(startup_id INT, funding_amount INT); INSERT INTO funding (startup_id, funding_amount) VALUES (1, 5000000); INSERT INTO funding (startup_id, funding_amount) VALUES (2, 7000000);", "sql": "SELECT MAX(funding_amount) FROM funding JOIN startups ON startups.id = funding.startup_id WHERE startups.founder_gender = 'Female' AND startups.industry = 'Healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Show the policy details of policies starting with 'P' and ending with 'Y'?", "schema": "CREATE TABLE Policies (PolicyID TEXT, PolicyHolder TEXT, Premium INT); INSERT INTO Policies (PolicyID, PolicyHolder, Premium) VALUES ('P123', 'John Doe', 1000); INSERT INTO Policies (PolicyID, PolicyHolder, Premium) VALUES ('Y456', 'Jane Smith', 2000);", "sql": "SELECT * FROM Policies WHERE PolicyID LIKE 'P%' AND PolicyID LIKE '%Y';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Australian equivalent to i /ɪ/?", "schema": "CREATE TABLE table_name_79 (australian VARCHAR, letter VARCHAR)", "sql": "SELECT australian FROM table_name_79 WHERE letter = 'i /ɪ/';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In rounds 7-13 what engine was featured?", "schema": "CREATE TABLE table_name_58 (engine VARCHAR, rounds VARCHAR)", "sql": "SELECT engine FROM table_name_58 WHERE rounds = '7-13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the average yield per hectare for corn in the 'rural_development' database?", "schema": "CREATE TABLE crop_yield (id INT, crop VARCHAR(255), yield_per_hectare FLOAT); INSERT INTO crop_yield (id, crop, yield_per_hectare) VALUES (1, 'corn', 5.6), (2, 'wheat', 3.2);", "sql": "SELECT AVG(yield_per_hectare) FROM crop_yield WHERE crop = 'corn';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the location and attendance for game 60?", "schema": "CREATE TABLE table_17121262_9 (location_attendance VARCHAR, game VARCHAR)", "sql": "SELECT location_attendance FROM table_17121262_9 WHERE game = 60;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average temperature change in each continent from 1950 to 2000?", "schema": "CREATE TABLE temperature (year INT, continent TEXT, temperature FLOAT); INSERT INTO temperature (year, continent, temperature) VALUES (1950, 'Africa', 22.3), (1950, 'Asia', 23.1), (1950, 'Europe', 10.3), (1950, 'North America', 8.9), (1950, 'South America', 21.4), (1950, 'Oceania', 18.2), (2000, 'Africa', 22.7), (2000, 'Asia', 24.0), (2000, 'Europe', 10.8), (2000, 'North America', 9.5), (2000, 'South America', 21.8), (2000, 'Oceania', 18.8);", "sql": "SELECT continent, AVG(temperature) FROM temperature WHERE year BETWEEN 1950 AND 2000 GROUP BY continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 883).", "schema": null, "sql": "select 0.5678 ^ (-85);", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 0.5678 ^ (-85)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year was Lindenwood University founded?", "schema": "CREATE TABLE table_10082596_1 (founded INTEGER, school VARCHAR)", "sql": "SELECT MIN(founded) FROM table_10082596_1 WHERE school = 'Lindenwood University';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the realization for 'to be, to do'", "schema": "CREATE TABLE table_name_71 (realization VARCHAR, gloss VARCHAR)", "sql": "SELECT realization FROM table_name_71 WHERE gloss = ''to be, to do'';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2011 population if the headquarters is Chittorgarh?", "schema": "CREATE TABLE table_2168295_1 (population__2011_ VARCHAR, headquarters VARCHAR)", "sql": "SELECT population__2011_ FROM table_2168295_1 WHERE headquarters = 'Chittorgarh';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 307).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What film did abdelatif hwidar direct that was in the short film 2007 prix uip category?", "schema": "CREATE TABLE table_name_93 (film VARCHAR, category VARCHAR, director_s_ VARCHAR)", "sql": "SELECT film FROM table_name_93 WHERE category = 'short film 2007 prix uip' AND director_s_ = 'abdelatif hwidar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What is the name of the company with the most employees in the year 2021 in the Education industry?", "schema": "CREATE TABLE company_info (id INT, name VARCHAR(50), industry VARCHAR(50), employees INT, year INT); INSERT INTO company_info (id, name, industry, employees, year) VALUES (1, 'EdTech Solutions', 'Education', 500, 2021); INSERT INTO company_info (id, name, industry, employees, year) VALUES (2, 'Green Energy Solutions', 'Environment', 200, 2021); INSERT INTO company_info (id, name, industry, employees, year) VALUES (3, 'SmartCare Inc', 'Healthcare', 300, 2021);", "sql": "SELECT name FROM company_info WHERE year = 2021 AND industry = 'Education' AND employees = (SELECT MAX(employees) FROM company_info WHERE year = 2021 AND industry = 'Education');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Rank nonprofits based in Washington by the number of donations received", "schema": "CREATE TABLE donations (id INT PRIMARY KEY, donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE, nonprofit_id INT); CREATE TABLE nonprofits (id INT PRIMARY KEY, name VARCHAR(100), city VARCHAR(50), mission VARCHAR(200)); INSERT INTO donations (id, donor_id, donation_amount, donation_date, nonprofit_id) VALUES (1, 1, 500, '2022-01-01', 1); INSERT INTO donations (id, donor_id, donation_amount, donation_date, nonprofit_id) VALUES (2, 2, 750, '2022-02-15', 2); INSERT INTO nonprofits (id, name, city, mission) VALUES (1, 'Save the Children', 'Washington', 'Improving the lives of children through better education, health care, and economic opportunities.'); INSERT INTO nonprofits (id, name, city, mission) VALUES (2, 'Greenpeace', 'San Francisco', 'Dedicated to preserving the environment and promoting peace.');", "sql": "SELECT nonprofits.name, ROW_NUMBER() OVER (PARTITION BY nonprofits.city ORDER BY COUNT(donations.id) DESC) as ranking FROM nonprofits INNER JOIN donations ON nonprofits.id = donations.nonprofit_id WHERE nonprofits.city = 'Washington' GROUP BY nonprofits.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the average tries for the season 2008 warrington wolves with an appearance more than 7?", "schema": "CREATE TABLE table_name_55 (tries INTEGER, season VARCHAR, appearance VARCHAR)", "sql": "SELECT AVG(tries) FROM table_name_55 WHERE season = '2008 warrington wolves' AND appearance > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 177).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (4,1,'7799461.4119');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the competition that had a win result and a score of 3-1?", "schema": "CREATE TABLE table_name_19 (competition VARCHAR, result VARCHAR, score VARCHAR)", "sql": "SELECT competition FROM table_name_19 WHERE result = 'win' AND score = '3-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total waste generated by the 'Acme Chemicals' plant located in 'San Francisco'?", "schema": "CREATE TABLE plants (id INT, name TEXT, city TEXT, state TEXT); INSERT INTO plants (id, name, city, state) VALUES (1, 'Acme Chemicals', 'San Francisco', 'CA'); CREATE TABLE wastes (id INT, plant_id INT, amount INT); INSERT INTO wastes (id, plant_id, amount) VALUES (1, 1, 500), (2, 1, 300), (3, 1, 700);", "sql": "SELECT SUM(wastes.amount) FROM wastes JOIN plants ON wastes.plant_id = plants.id WHERE plants.name = 'Acme Chemicals' AND plants.city = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Find policy types with more than one policyholder living in 'FL'.", "schema": "CREATE TABLE policyholders (id INT, name TEXT, state TEXT, policy_type TEXT, premium FLOAT); INSERT INTO policyholders (id, name, state, policy_type, premium) VALUES (1, 'John Doe', 'FL', 'Auto', 1200.00), (2, 'Jane Smith', 'FL', 'Auto', 1200.00), (3, 'Jim Brown', 'CA', 'Home', 2500.00);", "sql": "SELECT policy_type, COUNT(DISTINCT name) as num_policyholders FROM policyholders WHERE state = 'FL' GROUP BY policy_type HAVING num_policyholders > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total number of cases in 'justice_cases' table, resolved through mediation?", "schema": "CREATE TABLE justice_cases (id INT, case_type TEXT, resolution_method TEXT); INSERT INTO justice_cases (id, case_type, resolution_method) VALUES (1, 'Violent Crime', 'Restorative Justice'), (2, 'Property Crime', 'Mediation'), (3, 'Violent Crime', 'Mediation');", "sql": "SELECT COUNT(*) FROM justice_cases WHERE resolution_method = 'Mediation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average length in feet of the bridges?", "schema": "CREATE TABLE bridge (length_feet INTEGER)", "sql": "SELECT AVG(length_feet) FROM bridge;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the average speed of vessels for each port?", "schema": "CREATE TABLE port_vessels (id INT, port VARCHAR(50), vessel_id INT, name VARCHAR(50), speed DECIMAL(5,2)); INSERT INTO port_vessels VALUES (1, 'PortA', 1, 'Vessel1', 25.6), (2, 'PortA', 2, 'Vessel2', 27.3), (3, 'PortB', 3, 'Vessel3', 24.5);", "sql": "SELECT port, AVG(speed) FROM port_vessels GROUP BY port;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the maximum price of dysprosium in China in the last 3 years?", "schema": "CREATE TABLE dysprosium_prices (year INT, country TEXT, price FLOAT); INSERT INTO dysprosium_prices (year, country, price) VALUES (2019, 'China', 126.3), (2020, 'China', 135.2), (2021, 'China', 142.9), (2022, 'China', 150.1);", "sql": "SELECT MAX(price) FROM dysprosium_prices WHERE country = 'China' AND year >= 2019 AND year <= 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Calculate average donation per donor", "schema": "CREATE TABLE financial_donations (id INT, donor_id INT, amount INT); INSERT INTO financial_donations VALUES (1, 1, 500)", "sql": "SELECT AVG(fd.amount) / COUNT(DISTINCT fd.donor_id) AS avg_donation_per_donor FROM financial_donations fd;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Release date has a Required OS of windows, a Type of 2d, and a Developer(s) of zeonix?", "schema": "CREATE TABLE table_name_59 (release_date VARCHAR, developer_s_ VARCHAR, required_os VARCHAR, type VARCHAR)", "sql": "SELECT release_date FROM table_name_59 WHERE required_os = 'windows' AND type = '2d' AND developer_s_ = 'zeonix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the title that was directed by john terlesky", "schema": "CREATE TABLE table_24222929_2 (title VARCHAR, directed_by VARCHAR)", "sql": "SELECT title FROM table_24222929_2 WHERE directed_by = 'John Terlesky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the maximum smart city technology adoption rate in cities in the US, in the last 5 years?", "schema": "CREATE TABLE city_tech (id INT, city VARCHAR(20), country VARCHAR(20), adoption_rate FLOAT, year INT); INSERT INTO city_tech (id, city, country, adoption_rate, year) VALUES (1, 'San Francisco', 'USA', 0.75, 2018), (2, 'Portland', 'USA', 0.68, 2019), (3, 'Vancouver', 'Canada', 0.55, 2020);", "sql": "SELECT MAX(adoption_rate) FROM city_tech WHERE country = 'USA' AND year BETWEEN 2016 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all countries of markets in ascending alphabetical order.", "schema": "CREATE TABLE market (Country VARCHAR)", "sql": "SELECT Country FROM market ORDER BY Country;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "What is the total cost of projects with a completion date in 2021?", "schema": "CREATE TABLE Infrastructure (id INT, category VARCHAR(20), cost FLOAT, completed DATE); INSERT INTO Infrastructure (id, category, cost, completed) VALUES (1, 'Transportation', 5000000, '2020-01-01'), (2, 'WaterSupply', 3000000, '2019-01-01'), (3, 'Transportation', 7000000, '2021-01-01');", "sql": "SELECT SUM(cost) FROM Infrastructure WHERE YEAR(completed) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total sales revenue for a given region in a given year?", "schema": "CREATE TABLE sales (sale_id INT, product_id INT, sale_date DATE, region VARCHAR(50), sale_price FLOAT); INSERT INTO sales VALUES (1, 1, '2022-01-05', 'Europe', 15.99), (2, 2, '2022-02-10', 'Asia', 19.99), (3, 1, '2022-03-20', 'Europe', 15.99), (4, 3, '2022-03-25', 'Europe', 12.99);", "sql": "SELECT SUM(sale_price) FROM sales WHERE sale_date BETWEEN '2022-01-01' AND '2022-12-31' AND region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value in 2012 if it is 2R in 2013, 1R in 2005, and 1R in 2008?", "schema": "CREATE TABLE table_name_84 (Id VARCHAR)", "sql": "SELECT 2012 FROM table_name_84 WHERE 2013 = '2r' AND 2005 = '1r' AND 2008 = '1r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "How many volunteers engaged in each program, having more than 20 participants?", "schema": "CREATE TABLE Volunteers (id INT, volunteer_name TEXT, program TEXT, participation_date DATE); INSERT INTO Volunteers (id, volunteer_name, program, participation_date) VALUES (1, 'Alice', 'Feed the Hungry', '2022-01-01'), (2, 'Bob', 'Tutor Kids', '2022-01-02');", "sql": "SELECT program, COUNT(*) FROM Volunteers GROUP BY program HAVING COUNT(*) > 20;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the population size of 'Birds' species in 'Habitat 5'?", "schema": "CREATE TABLE Habitat5(animal_id INT, species VARCHAR(20), population INT, habitat VARCHAR(20)); INSERT INTO Habitat5 VALUES (1, 'Eagle', 50, 'Habitat5'), (2, 'Hawk', 60, 'Habitat5'), (3, 'Osprey', 70, 'Habitat5');", "sql": "SELECT SUM(population) FROM Habitat5 WHERE species IN (SELECT species FROM AnimalData3 WHERE order_class = 'Birds') AND habitat = 'Habitat5';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the high assists on the date December 12?", "schema": "CREATE TABLE table_11960944_4 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT high_assists FROM table_11960944_4 WHERE date = 'December 12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average rating of games released in the past year, grouped by genre and platform?", "schema": "CREATE TABLE games (id INT, title VARCHAR(50), release_date DATE, rating INT, genre VARCHAR(20), platform VARCHAR(20));", "sql": "SELECT g.genre, g.platform, AVG(g.rating) FROM games g WHERE g.release_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY g.genre, g.platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_procedure' (example 14).", "schema": null, "sql": "$$;\n\nCALL ptest7(least('a', 'b'), 'a');\n\n\n-- empty body\nCREATE PROCEDURE ptest8(x text)\nBEGIN ATOMIC\nEND;\n\n\\df ptest8\nSELECT pg_get_functiondef('ptest8'::regproc);\nCALL ptest8('');\n\n\n-- OUT parameters\n\nCREATE PROCEDURE ptest9(OUT a int)\nLANGUAGE SQL\nAS $$\nINSERT INTO cp_test VALUES (1, 'a');", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Procedure.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 6} {"question": "What is the total cargo weight transported by vessels from India to Australia in the last year, excluding liquefied gas carriers?", "schema": "CREATE TABLE Ports (id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE CargoTransports (id INT, vessel_id INT, weight INT, transport_time TIMESTAMP, source_port_id INT, destination_port_id INT);", "sql": "SELECT SUM(weight) FROM CargoTransports WHERE transport_time > NOW() - INTERVAL '1 year' AND source_port_id IN (SELECT id FROM Ports WHERE country = 'India') AND destination_port_id IN (SELECT id FROM Ports WHERE country = 'Australia') AND (SELECT vessel_type FROM Vessels WHERE id = CargoTransports.vessel_id) != 'Liquefied Gas Carrier';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 338, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Constructor has 56 Laps?", "schema": "CREATE TABLE table_name_13 (constructor VARCHAR, laps VARCHAR)", "sql": "SELECT constructor FROM table_name_13 WHERE laps = 56;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total when gold is 1, and rank is more than 4, and bronze is 0?", "schema": "CREATE TABLE table_name_1 (total VARCHAR, bronze VARCHAR, gold VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_1 WHERE gold = 1 AND rank > 4 AND bronze = 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/dblink/dblink--1.0--1.1.sql */\n\n-- complain if script is sourced in psql, rather than via ALTER EXTENSION\n\\echo Use \"ALTER EXTENSION dblink UPDATE TO '1.1'\" to load this file. \\quit\n\nCREATE FUNCTION dblink_fdw_validator(\n options text[],\n catalog oid\n)\nRETURNS void\nAS 'MODULE_PATHNAME', 'dblink_fdw_validator'\nLANGUAGE C STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 346, "num_statements": 1} {"question": "What is the total number of Shariah-compliant finance accounts in all regions?", "schema": "CREATE TABLE all_regions (region VARCHAR(20), account_type VARCHAR(30)); INSERT INTO all_regions (region, account_type) VALUES ('Northeast', 'Shariah-compliant Finance'), ('Northeast', 'Traditional Finance'), ('Southeast', 'Shariah-compliant Finance'), ('Southeast', 'Traditional Finance'), ('Midwest', 'Shariah-compliant Finance'), ('Midwest', 'Traditional Finance'), ('Western', 'Shariah-compliant Finance'), ('Western', 'Traditional Finance');", "sql": "SELECT COUNT(*) FROM all_regions WHERE account_type = 'Shariah-compliant Finance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Find the total revenue and average sustainability rating for each city with restaurants.", "schema": "CREATE TABLE Restaurants (RestaurantID int, RestaurantName varchar(50), City varchar(50), AnnualRevenue numeric(10, 2)); INSERT INTO Restaurants (RestaurantID, RestaurantName, City, AnnualRevenue) VALUES (1, 'The Green Garden', 'San Francisco', 50000); INSERT INTO Restaurants (RestaurantID, RestaurantName, City, AnnualRevenue) VALUES (2, 'Healthy Bites', 'Los Angeles', 70000); CREATE TABLE MenuItems (MenuItemID int, RestaurantID int, MenuItemName varchar(50), SaleAmount numeric(10, 2), SustainabilityRating int); INSERT INTO MenuItems (MenuItemID, RestaurantID, MenuItemName, SaleAmount, SustainabilityRating) VALUES (1, 1, 'Quinoa Salad', 2000, 5); INSERT INTO MenuItems (MenuItemID, RestaurantID, MenuItemName, SaleAmount, SustainabilityRating) VALUES (2, 1, 'Chickpea Curry', 3000, 4); INSERT INTO MenuItems (MenuItemID, RestaurantID, MenuItemName, SaleAmount, SustainabilityRating) VALUES (3, 2, 'Tofu Stir Fry', 4000, 5); INSERT INTO MenuItems (MenuItemID, RestaurantID, MenuItemName, SaleAmount, SustainabilityRating) VALUES (4, 2, 'Vegetable Sushi', 1000, 3);", "sql": "SELECT R.City, SUM(M.SaleAmount) AS TotalRevenue, AVG(M.SustainabilityRating) AS AvgSustainabilityRating FROM Restaurants R JOIN MenuItems M ON R.RestaurantID = M.RestaurantID GROUP BY R.City;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "How many carbon offset initiatives have been launched by the city government of Paris in the last 2 years?", "schema": "CREATE TABLE carbon_offset_initiatives (initiative_id INT, initiative_name VARCHAR(100), launch_date DATE, city VARCHAR(100)); INSERT INTO carbon_offset_initiatives (initiative_id, initiative_name, launch_date, city) VALUES (1, 'Tree Planting', '2022-01-01', 'Paris'), (2, 'Bicycle Sharing Expansion', '2021-07-01', 'Paris');", "sql": "SELECT COUNT(*) FROM carbon_offset_initiatives WHERE city = 'Paris' AND launch_date >= DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the distinct apartment numbers of the apartments that have bookings with status code \"Confirmed\".", "schema": "CREATE TABLE Apartments (apt_number VARCHAR, apt_id VARCHAR); CREATE TABLE Apartment_Bookings (apt_id VARCHAR, booking_status_code VARCHAR)", "sql": "SELECT DISTINCT T2.apt_number FROM Apartment_Bookings AS T1 JOIN Apartments AS T2 ON T1.apt_id = T2.apt_id WHERE T1.booking_status_code = 'Confirmed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the Competition of friendly?", "schema": "CREATE TABLE table_name_61 (result VARCHAR, competition VARCHAR)", "sql": "SELECT result FROM table_name_61 WHERE competition = 'friendly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the daily sales trend for each product category in the last month?", "schema": "CREATE TABLE Product (id INT, name VARCHAR(255), category VARCHAR(255), revenue FLOAT, sale_date DATE);", "sql": "SELECT category, sale_date, SUM(revenue) as daily_sales FROM Product WHERE sale_date >= (CURRENT_DATE - INTERVAL '1 month') GROUP BY ROLLUP(category, sale_date) ORDER BY category, sale_date DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 195, "num_statements": 1} {"question": "What is the maximum permit issuance fee for projects in the Southeast?", "schema": "CREATE TABLE Permits (id INT, region VARCHAR(255), issuance_fee FLOAT); INSERT INTO Permits (id, region, issuance_fee) VALUES (1, 'Southeast', 500.0), (2, 'Northwest', 700.0), (3, 'Southeast', 600.0);", "sql": "SELECT MAX(issuance_fee) FROM Permits WHERE region = 'Southeast';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average budget of transportation projects in African nations that have been completed?", "schema": "CREATE TABLE transportation_projects (id INT, project_budget INT, project_status TEXT, country TEXT); INSERT INTO transportation_projects (id, project_budget, project_status, country) VALUES (1, 50000, 'completed', 'Nigeria'), (2, 75000, 'in_progress', 'Kenya'), (3, 30000, 'completed', 'Egypt');", "sql": "SELECT AVG(project_budget) FROM transportation_projects WHERE project_status = 'completed' AND country IN ('Africa');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the 2007 actor from Moscow for the voyage of Varenka Bakunin?", "schema": "CREATE TABLE table_name_91 (actor_in_moscow VARCHAR, _2007 VARCHAR, voyage VARCHAR)", "sql": "SELECT actor_in_moscow, _2007 FROM table_name_91 WHERE voyage = 'varenka bakunin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the runner-up in 1999?", "schema": "CREATE TABLE table_name_26 (runner_up VARCHAR, year VARCHAR)", "sql": "SELECT runner_up FROM table_name_26 WHERE year = 1999;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the snatch for the Clean & jerk of 145.0, and a Bodyweight larger than 76.22?", "schema": "CREATE TABLE table_name_50 (snatch INTEGER, bodyweight VARCHAR, clean_ VARCHAR, _jerk VARCHAR)", "sql": "SELECT AVG(snatch) FROM table_name_50 WHERE clean_ & _jerk = '145.0' AND bodyweight > 76.22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If a radius is 10, what is the lowest possible mass?", "schema": "CREATE TABLE table_10432351_1 (mass__m___ INTEGER, radius__r___ VARCHAR)", "sql": "SELECT MIN(mass__m___) FROM table_10432351_1 WHERE radius__r___ = 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total weight of cannabis flowers produced by each cultivation site in Colorado in the last 6 months?", "schema": "CREATE TABLE cultivation_sites (id INT, state TEXT, site_name TEXT);CREATE TABLE cultivation (id INT, site_id INT, plant_weight DECIMAL, plant_type TEXT, cultivation_date DATE);", "sql": "SELECT cs.site_name, SUM(c.plant_weight) as total_weight FROM cultivation_sites cs INNER JOIN cultivation c ON cs.id = c.site_id WHERE cs.state = 'Colorado' AND c.plant_type = 'flowers' AND c.cultivation_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY cs.site_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 275, "num_statements": 1} {"question": "What is the total quantity of cargo handled by ports in each region, excluding the African region?", "schema": "CREATE TABLE ports_cargo_region (port_id INT, port_name TEXT, region TEXT, cargo_quantity INT); INSERT INTO ports_cargo_region VALUES (1, 'Port I', 'Africa', 2000), (2, 'Port J', 'Europe', 1800), (3, 'Port K', 'Asia Pacific', 2200);", "sql": "SELECT ports_cargo_region.region, SUM(ports_cargo_region.cargo_quantity) FROM ports_cargo_region WHERE ports_cargo_region.region != 'Africa' GROUP BY ports_cargo_region.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "Delete records with humidity levels below 40 for the 'USA' in the month of January from the humidity table.", "schema": "CREATE TABLE iot_sensors (id INT, name TEXT, country TEXT); INSERT INTO iot_sensors (id, name, country) VALUES (1, 'IS1', 'USA'), (2, 'IS2', 'Canada'); CREATE TABLE humidity (id INT, sensor_id INT, timestamp TIMESTAMP, humidity FLOAT); INSERT INTO humidity (id, sensor_id, timestamp, humidity) VALUES (1, 1, '2021-01-01 12:00:00', 45), (2, 1, '2021-01-01 16:00:00', 38), (3, 1, '2021-01-01 20:00:00', 42), (4, 2, '2021-01-01 12:00:00', 50), (5, 2, '2021-01-01 16:00:00', 48), (6, 2, '2021-01-01 20:00:00', 52);", "sql": "DELETE FROM humidity WHERE sensor_id IN (SELECT sensor_id FROM humidity WHERE country = 'USA' AND EXTRACT(MONTH FROM timestamp) = 1 AND humidity < 40) AND EXTRACT(MONTH FROM timestamp) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "What is the average credit score for clients who have a Shariah-compliant personal loan?", "schema": "CREATE TABLE shariah_compliant_personal_loans (client_id INT, credit_score INT); INSERT INTO shariah_compliant_personal_loans (client_id, credit_score) VALUES (1, 700), (2, 750), (3, 650); CREATE TABLE shariah_compliant_loans (loan_id INT, client_id INT); INSERT INTO shariah_compliant_loans (loan_id, client_id) VALUES (101, 1), (102, 2), (103, 3);", "sql": "SELECT AVG(scpl.credit_score) FROM shariah_compliant_personal_loans scpl JOIN shariah_compliant_loans scl ON scpl.client_id = scl.client_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Show the total number of affordable housing units in Oakland and Boston.", "schema": "CREATE TABLE oakland_housing(id INT, type VARCHAR(20), price INT); INSERT INTO oakland_housing VALUES (1, 'affordable', 300000); CREATE TABLE boston_housing(id INT, type VARCHAR(20), price INT); INSERT INTO boston_housing VALUES (1, 'affordable', 400000);", "sql": "SELECT COUNT(*) FROM oakland_housing WHERE type = 'affordable' UNION ALL SELECT COUNT(*) FROM boston_housing WHERE type = 'affordable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the African Spoonbill when the Ostrich is Brown-hooded Kingfisher?", "schema": "CREATE TABLE table_20042805_2 (african_spoonbill VARCHAR, ostrich VARCHAR)", "sql": "SELECT african_spoonbill FROM table_20042805_2 WHERE ostrich = 'Brown-hooded Kingfisher';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the average wellbeing score for athletes on the Golden State Warriors?", "schema": "CREATE TABLE athletes (athlete_id INT, wellbeing_score INT, team_id INT); CREATE TABLE teams (team_id INT, team_name VARCHAR(20)); INSERT INTO teams (team_id, team_name) VALUES (4, 'Golden State Warriors'); INSERT INTO athletes (athlete_id, wellbeing_score, team_id) VALUES (4, 80, 4);", "sql": "SELECT AVG(athletes.wellbeing_score) FROM athletes INNER JOIN teams ON athletes.team_id = teams.team_id WHERE teams.team_name = 'Golden State Warriors';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'merge' (example 445).", "schema": null, "sql": "INSERT INTO pa_target SELECT '2017-01-31', id, id * 100, 'initial' FROM generate_series(1,9,3) AS id;", "explanation": "DML from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the scoring average for year less than 1998 and wins more than 3", "schema": "CREATE TABLE table_name_92 (scoring_average VARCHAR, year VARCHAR, wins VARCHAR)", "sql": "SELECT scoring_average FROM table_name_92 WHERE year < 1998 AND wins > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total amount of funding received from the \"Government\" source?", "schema": "CREATE TABLE Funding (FundingID INT, FundingSource TEXT, Amount DECIMAL); INSERT INTO Funding (FundingID, FundingSource, Amount) VALUES (1, 'Government', 5000.00), (2, 'Private', 3000.00);", "sql": "SELECT SUM(Amount) FROM Funding WHERE FundingSource = 'Government';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total donation amount by volunteers in Mexico, for each program, in the fiscal year 2022?", "schema": "CREATE TABLE donations (id INT, donor_name VARCHAR(50), donation_amount DECIMAL(10,2), donation_date DATE, is_volunteer BOOLEAN, program_name VARCHAR(50)); INSERT INTO donations (id, donor_name, donation_amount, donation_date, is_volunteer, program_name) VALUES (1, 'Carlos Hernandez', 100.00, '2022-04-01', true, 'Program A'), (2, 'Ana Garcia', 200.00, '2022-07-01', true, 'Program B'), (3, 'Luis Rodriguez', 150.00, '2022-10-01', false, 'Program C'), (4, 'Mariana Sanchez', 50.00, '2022-01-01', true, 'Program A'); CREATE TABLE programs (id INT, program_name VARCHAR(50)); INSERT INTO programs (id, program_name) VALUES (1, 'Program A'), (2, 'Program B'), (3, 'Program C');", "sql": "SELECT p.program_name, DATE_FORMAT(d.donation_date, '%Y-%V') AS fiscal_year, SUM(d.donation_amount) FROM donations d JOIN programs p ON d.program_name = p.program_name WHERE d.is_volunteer = true AND d.donor_country = 'Mexico' GROUP BY p.program_name, fiscal_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 264, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location of the Durfee Mill No. 1, built before 1872 ?", "schema": "CREATE TABLE table_name_7 (location VARCHAR, built VARCHAR, name VARCHAR)", "sql": "SELECT location FROM table_name_7 WHERE built < 1872 AND name = 'durfee mill no. 1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What was the average sustainability score for the 'Spring 2022' collection?", "schema": "CREATE TABLE garment_data_3 (garment_id INT, collection VARCHAR(20), sustainability_score FLOAT); INSERT INTO garment_data_3 (garment_id, collection, sustainability_score) VALUES (1, 'Spring 2022', 8.2), (2, 'Summer 2022', 8.7), (3, 'Autumn 2022', 9.0);", "sql": "SELECT AVG(sustainability_score) FROM garment_data_3 WHERE collection = 'Spring 2022';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "List all artifacts with their respective analysis results, excluding those from 'Site13'.", "schema": "CREATE TABLE Artifacts (ArtifactName VARCHAR(50), SiteName VARCHAR(50), AnalysisResult VARCHAR(50)); INSERT INTO Artifacts (ArtifactName, SiteName, AnalysisResult) VALUES ('ArtifactC1', 'Site13', 'ResultC1'), ('ArtifactC2', 'Site14', 'ResultC2'), ('ArtifactD1', 'Site15', 'ResultD1');", "sql": "SELECT ArtifactName, AnalysisResult FROM Artifacts WHERE SiteName != 'Site13';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 945).", "schema": null, "sql": "select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '[\"all\", null]');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '[\"all\", null]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 137, "num_statements": 1} {"question": "What is the average cultural competency score for hospitals in each state?", "schema": "CREATE TABLE HospitalsCulturalCompetency (HospitalID INT, State VARCHAR(255), CulturalCompetencyScore INT); INSERT INTO HospitalsCulturalCompetency (HospitalID, State, CulturalCompetencyScore) VALUES (1, 'California', 85), (2, 'New York', 90), (3, 'Florida', 75), (4, 'Texas', 80), (5, 'Pennsylvania', 95);", "sql": "SELECT State, AVG(CulturalCompetencyScore) as AvgCulturalCompetencyScore FROM HospitalsCulturalCompetency GROUP BY State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "How many mobile and broadband customers are there in each region?", "schema": "CREATE TABLE mobile_customers (customer_id INT, region VARCHAR(20)); CREATE TABLE broadband_customers (customer_id INT, region VARCHAR(20)); INSERT INTO mobile_customers (customer_id, region) VALUES (1, 'NE'), (2, 'SE'), (3, 'NW'); INSERT INTO broadband_customers (customer_id, region) VALUES (4, 'NE'), (5, 'SW'), (6, 'NE');", "sql": "SELECT 'mobile' as service, region, COUNT(*) as num_customers FROM mobile_customers GROUP BY region UNION ALL SELECT 'broadband', region, COUNT(*) FROM broadband_customers GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 1187).", "schema": null, "sql": "CREATE TABLE fkpart13_t2 (\n part_id int PRIMARY KEY,\n column_to_drop int,\n FOREIGN KEY (part_id) REFERENCES fkpart13_t1 ON UPDATE CASCADE ON DELETE CASCADE\n) PARTITION BY LIST (part_id);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 189, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 109).", "schema": null, "sql": "select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following\n\texclude group), salary, enroll_date from empsalary;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following\n\texclude group), salary, enroll_date from empsalary) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 174, "num_statements": 1} {"question": "Find the minimum price of plus size clothing", "schema": "CREATE TABLE products (id INT, category VARCHAR(50), subcategory VARCHAR(50), is_plus_size BOOLEAN, price DECIMAL(5,2)); INSERT INTO products (id, category, subcategory, is_plus_size, price) VALUES (1, 'Clothing', 'Tops', FALSE, 19.99), (2, 'Clothing', 'Pants', TRUE, 49.99), (3, 'Clothing', 'Dresses', FALSE, 69.99), (4, 'Clothing', 'Jackets', TRUE, 89.99), (5, 'Clothing', 'Skirts', FALSE, 39.99);", "sql": "SELECT MIN(price) FROM products WHERE category = 'Clothing' AND is_plus_size = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 198).", "schema": null, "sql": "DELETE FROM temporal_mltrng3;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "What is the minimum age of patients diagnosed with HIV in Florida?", "schema": "CREATE TABLE Patients (ID INT, Age INT, Disease VARCHAR(20), State VARCHAR(20)); INSERT INTO Patients (ID, Age, Disease, State) VALUES (1, 34, 'Tuberculosis', 'California'); INSERT INTO Patients (ID, Age, Disease, State) VALUES (2, 28, 'HIV', 'Florida');", "sql": "SELECT MIN(Age) FROM Patients WHERE Disease = 'HIV' AND State = 'Florida';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the number of teachers who have completed professional development in data science, by school district?", "schema": "CREATE TABLE districts (district_id INT, district_name VARCHAR(255)); CREATE TABLE teachers (teacher_id INT, district_id INT, years_of_experience INT); CREATE TABLE workshops (workshop_id INT, district_id INT, workshop_topic VARCHAR(255), teacher_id INT); INSERT INTO districts (district_id, district_name) VALUES (1, 'Downtown'), (2, 'Uptown'); INSERT INTO teachers (teacher_id, district_id, years_of_experience) VALUES (1, 1, 5), (2, 1, 10), (3, 2, 3), (4, 2, 8); INSERT INTO workshops (workshop_id, district_id, workshop_topic, teacher_id) VALUES (1, 1, 'Data Science', 1), (2, 1, 'Data Science', 2), (3, 2, 'Data Science', 3), (4, 2, 'Data Science', 4);", "sql": "SELECT sd.district_name, COUNT(w.teacher_id) as num_teachers FROM districts sd JOIN workshops w ON sd.district_id = w.district_id WHERE w.workshop_topic = 'Data Science' GROUP BY sd.district_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the only city name with a population density of 200?", "schema": "CREATE TABLE table_21284653_1 (name VARCHAR, population_density VARCHAR)", "sql": "SELECT name FROM table_21284653_1 WHERE population_density = '200';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest tonnage for a ship from Great Britain named Newton Ash?", "schema": "CREATE TABLE table_name_13 (tonnage INTEGER, nationality VARCHAR, ship VARCHAR)", "sql": "SELECT MAX(tonnage) FROM table_name_13 WHERE nationality = 'great britain' AND ship = 'newton ash';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What was the total revenue for each team's merchandise sales by month in 2022?", "schema": "CREATE TABLE teams (id INT, name VARCHAR(255)); INSERT INTO teams (id, name) VALUES (1, 'TeamA'), (2, 'TeamB'), (3, 'TeamC'); CREATE TABLE merchandise_sales (team_id INT, year INT, month INT, revenue DECIMAL(10,2));", "sql": "SELECT t.name, m.month, SUM(m.revenue) as total_revenue FROM merchandise_sales m JOIN teams t ON m.team_id = t.id WHERE m.year = 2022 GROUP BY t.name, m.month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the patient who has the most recent undergoing treatment?", "schema": "CREATE TABLE undergoes (patient VARCHAR, dateundergoes VARCHAR)", "sql": "SELECT patient FROM undergoes ORDER BY dateundergoes LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many CFL teams drafted someone from mount allison college?", "schema": "CREATE TABLE table_26996293_3 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT COUNT(cfl_team) FROM table_26996293_3 WHERE college = 'Mount Allison';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL Json: show example 4.", "schema": null, "sql": "-- Simple scalar/primitive values contain only the identical value: SELECT '\"foo\"'::jsonb @> '\"foo\"'::jsonb; -- The array on the right side is contained within the one on the left: SELECT '[1, 2, 3]'::jsonb @> '[1, 3]'::jsonb; -- Order of array elements is not significant, so this is also true: SELECT '[1, 2, 3]'::jsonb @> '[3, 1]'::jsonb; -- Duplicate array elements don't matter either: SELECT '[1, 2, 3]'::jsonb @> '[1, 2, 2]'::jsonb; -- The object with a single pair on the right side is contained -- within the object on the left side: SELECT '{\"product\": \"PostgreSQL\", \"version\": 9.4, \"jsonb\": true}'::jsonb @> '{\"version\": 9.4}'::jsonb; -- The array on the right side is not considered contained within the -- array on the left, even though a similar array is nested within it: SELECT '[1, 2, [1, 3]]'::jsonb @> '[1, 3]'::jsonb; -- yields false -- But with a layer of nesting, it is contained: SELECT '[1, 2, [1, 3]]'::jsonb @> '[[1, 3]]'::jsonb; -- Similarly, containment is not reported here: SELECT '{\"foo\": {\"bar\": \"baz\"}}'::jsonb @> '{\"bar\": \"baz\"}'::jsonb; -- yields false -- A top-level key and an empty object is contained: SELECT '{\"foo\": {\"bar\": \"baz\"}}'::jsonb @> '{\"foo\": {}}'::jsonb;", "explanation": "Example from PostgreSQL documentation on Json.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 1205, "num_statements": 9} {"question": "What is the total number of inclusive housing units in the state of Florida?", "schema": "CREATE TABLE housing (id INT, state VARCHAR(20), inclusive BOOLEAN); INSERT INTO housing (id, state, inclusive) VALUES (1, 'Florida', TRUE), (2, 'Florida', FALSE), (3, 'Florida', TRUE);", "sql": "SELECT COUNT(*) FROM housing WHERE state = 'Florida' AND inclusive = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many employees in each department have been promoted in the past year?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(25), Salary DECIMAL(10, 2), PromotionDate DATE); INSERT INTO Employees (EmployeeID, Department, Salary, PromotionDate) VALUES (1, 'Marketing', 60000, '2021-12-01'), (2, 'IT', 70000, NULL), (3, 'Marketing', 65000, NULL), (4, 'IT', 75000, '2022-02-01');", "sql": "SELECT Department, COUNT(*) FROM Employees WHERE PromotionDate >= DATEADD(year, -1, GETDATE()) GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the track for 1978?", "schema": "CREATE TABLE table_name_47 (track VARCHAR, year VARCHAR)", "sql": "SELECT track FROM table_name_47 WHERE year = 1978;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 66).", "schema": null, "sql": "SELECT * FROM check_test(\n sequences_are( ARRAY['sumeseq', 'someseq', 'bar'] ),\n false,\n 'sequences_are(sequences) missing',\n 'Search path ' || pg_catalog.current_setting('search_path') || ' should have the correct sequences',\n ' Missing sequences:\n bar'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 282, "num_statements": 1} {"question": "Update all records in the 'ingredient' table where the 'origin' column is 'China'", "schema": "CREATE TABLE ingredient (product_id INT, ingredient TEXT, origin TEXT);", "sql": "UPDATE ingredient SET origin = 'Taiwan' WHERE origin = 'China';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Insert a new record in the 'state_facts' table with data for the state of 'TX': capital 'Austin', population 29_527_941, and area 268_596 sq miles", "schema": "CREATE TABLE state_facts (state VARCHAR(2), capital VARCHAR(50), population INT, area_sq_miles INT);", "sql": "INSERT INTO state_facts (state, capital, population, area_sq_miles) VALUES ('TX', 'Austin', 29527941, 268596);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was she on a 350cc class bike, ranked 16th, with over 0 wins?", "schema": "CREATE TABLE table_name_85 (year INTEGER, wins VARCHAR, class VARCHAR, rank VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_85 WHERE class = '350cc' AND rank = '16th' AND wins > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Identify the number of unique volunteers who participated in events in NYC in 2020 and the average number of hours they contributed.", "schema": "CREATE TABLE Volunteers (VolunteerID int, EventID int, Hours decimal(5,2)); INSERT INTO Volunteers (VolunteerID, EventID, Hours) VALUES (1, 100, 5.5), (2, 101, 7.2), (3, 100, 3.8), (4, 102, 6.5), (5, 101, 8.0);", "sql": "SELECT COUNT(DISTINCT VolunteerID), AVG(Hours) FROM Volunteers INNER JOIN (SELECT EventID FROM Events WHERE City = 'NYC') AS EventLocations ON Volunteers.EventID = EventLocations.EventID WHERE EXTRACT(YEAR FROM EventDate) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the current version of the project64 with gpl v2 license?", "schema": "CREATE TABLE table_name_38 (current_version VARCHAR, license VARCHAR, name VARCHAR)", "sql": "SELECT current_version FROM table_name_38 WHERE license = 'gpl v2' AND name = 'project64';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What are the production figures for the 'Troll' field for the years 2018 and 2019", "schema": "CREATE TABLE field_production (field VARCHAR(50), year INT, oil_production FLOAT, gas_production FLOAT); INSERT INTO field_production (field, year, oil_production, gas_production) VALUES ('Troll', 2018, 1234.5, 678.9); INSERT INTO field_production (field, year, oil_production, gas_production) VALUES ('Troll', 2019, 2345.6, 789.0);", "sql": "SELECT year, oil_production, gas_production FROM field_production WHERE field = 'Troll';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Create a view named \"affordable_properties\" that contains all properties with a sustainability rating of at least 80", "schema": "CREATE TABLE properties (property_id INT, property_name VARCHAR(255), location VARCHAR(255), sustainability_rating INT); INSERT INTO properties (property_id, property_name, location, sustainability_rating) VALUES (1, 'Green Heights', 'Brooklyn', 85), (2, 'Sunrise Village', 'Seattle', 70), (3, 'EcoHaven', 'Austin', 88);", "sql": "CREATE VIEW affordable_properties AS SELECT * FROM properties WHERE sustainability_rating >= 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the average production rate of chemical X in the last 3 months?", "schema": "CREATE TABLE Chemical_Production (Chemical_Name VARCHAR(255), Production_Date DATE, Production_Rate INT); INSERT INTO Chemical_Production (Chemical_Name, Production_Date, Production_Rate) VALUES ('Chemical X', '2022-01-01', 500), ('Chemical X', '2022-02-01', 600), ('Chemical X', '2022-03-01', 700);", "sql": "SELECT AVG(Production_Rate) FROM Chemical_Production WHERE Chemical_Name = 'Chemical X' AND Production_Date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the name when the lane is less than 4 and mark is 52.64?", "schema": "CREATE TABLE table_name_37 (name VARCHAR, lane VARCHAR, mark VARCHAR)", "sql": "SELECT name FROM table_name_37 WHERE lane < 4 AND mark = '52.64';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Add a new record to the \"sustainable_practices\" table with an ID of 6, a description of 'Recycling initiatives in housekeeping', and a category of 'Waste'", "schema": "CREATE TABLE sustainable_practices (practice_id INT, description TEXT, category VARCHAR(20));", "sql": "INSERT INTO sustainable_practices (practice_id, description, category) VALUES (6, 'Recycling initiatives in housekeeping', 'Waste');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Which decentralized applications were developed by Stellar's development team?", "schema": "CREATE TABLE decentralized_apps (app_name VARCHAR(255), developer VARCHAR(255)); INSERT INTO decentralized_apps (app_name, developer) VALUES ('StellarPort', 'Stellar Devs'); INSERT INTO decentralized_apps (app_name, developer) VALUES ('Smartlands', 'Stellar Devs');", "sql": "SELECT app_name FROM decentralized_apps WHERE developer = 'Stellar Devs';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the year which offers the largest number of courses.", "schema": "CREATE TABLE SECTION (YEAR VARCHAR)", "sql": "SELECT YEAR FROM SECTION GROUP BY YEAR ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type for EU Council Presidency of UK and John Major as President-in-Office?", "schema": "CREATE TABLE table_name_27 (type VARCHAR, eu_council_presidency VARCHAR, president_in_office VARCHAR)", "sql": "SELECT type FROM table_name_27 WHERE eu_council_presidency = 'uk' AND president_in_office = 'john major';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the blocks per game in the selection where the field goal percentage was .594 (2nd)?", "schema": "CREATE TABLE table_25774493_3 (blocks_per_game VARCHAR, field_goal_percentage VARCHAR)", "sql": "SELECT blocks_per_game FROM table_25774493_3 WHERE field_goal_percentage = '.594 (2nd)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total budget allocated for infrastructure projects in the 'StateData' schema's 'StateInfrastructure' table, for services provided by private contractors in 2024?", "schema": "CREATE SCHEMA StateData; CREATE TABLE StateInfrastructure (Service varchar(255), Year int, Budget int, Contractor varchar(255)); INSERT INTO StateInfrastructure (Service, Year, Budget, Contractor) VALUES ('Roads', 2024, 300000, 'Public'), ('Roads', 2024, 700000, 'Private'), ('Bridges', 2024, 500000, 'Public');", "sql": "SELECT SUM(Budget) FROM StateData.StateInfrastructure WHERE Year = 2024 AND Contractor = 'Private';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the maximum duration of 'Deep Space' missions?", "schema": "CREATE TABLE MissionData (name VARCHAR(50), type VARCHAR(15), duration INT); INSERT INTO MissionData (name, type, duration) VALUES ('Ares 1', 'Low Earth Orbit', 150), ('Artemis 1', 'Deep Space', 300), ('Apollo 11', 'Low Earth Orbit', 200), ('Voyager 1', 'Deep Space', 400);", "sql": "SELECT MAX(duration) FROM MissionData WHERE type = 'Deep Space';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Find the total donation amount for each donor", "schema": "CREATE TABLE DonationAmounts (DonationID INT, DonorID INT, DonationDate DATE, DonationAmount DECIMAL(10,2));", "sql": "SELECT DonorID, SUM(DonationAmount) FROM DonationAmounts GROUP BY DonorID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the maximum number of articles published in a day by \"Al Jazeera\" in 2022?", "schema": "CREATE TABLE articles (id INT, title TEXT, publication TEXT, published_at DATE); INSERT INTO articles (id, title, publication, published_at) VALUES (1, 'Article 1', 'Al Jazeera', '2022-01-01'); INSERT INTO articles (id, title, publication, published_at) VALUES (2, 'Article 2', 'Al Jazeera', '2022-01-02'); INSERT INTO articles (id, title, publication, published_at) VALUES (3, 'Article 3', 'Al Jazeera', '2022-01-02');", "sql": "SELECT MAX(cnt) FROM (SELECT published_at, COUNT(*) as cnt FROM articles WHERE publication = 'Al Jazeera' AND YEAR(published_at) = 2022 GROUP BY published_at) as t;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Place of the Player with a To par of –1?", "schema": "CREATE TABLE table_name_61 (place VARCHAR, to_par VARCHAR)", "sql": "SELECT place FROM table_name_61 WHERE to_par = '–1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "How many hours did each volunteer contribute in the first quarter of 2024, including any partial hours?", "schema": "CREATE TABLE VolunteerHours (HourID INT, VolunteerID INT, Hours DECIMAL(10,2), HourDate DATE);", "sql": "SELECT V.Name, SUM(VH.Hours) as TotalHours FROM VolunteerHours VH JOIN Volunteers V ON VH.VolunteerID = Volunteers.VolunteerID WHERE VH.HourDate BETWEEN '2024-01-01' AND '2024-03-31' GROUP BY V.VolunteerID, V.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1st(m) score for the Person who had a total points of 272.7", "schema": "CREATE TABLE table_14407512_9 (points VARCHAR)", "sql": "SELECT 1 AS st__m_ FROM table_14407512_9 WHERE points = '272.7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which incumbent was in the 24th district?", "schema": "CREATE TABLE table_name_60 (incumbent VARCHAR, district VARCHAR)", "sql": "SELECT incumbent FROM table_name_60 WHERE district = '24th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'brin_multi': Write the SELECT query (example 96).", "schema": null, "sql": "SELECT COUNT(*) FROM brin_test_multi_1 WHERE a <= 25;", "explanation": "Regression test for Brin Multi in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT COUNT(*) FROM brin_test_multi_1 WHERE a <= 25) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Candidate of riikka manner has how many votes?", "schema": "CREATE TABLE table_name_41 (votes VARCHAR, candidate VARCHAR)", "sql": "SELECT votes FROM table_name_41 WHERE candidate = 'riikka manner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the film title nominated from the Netherlands?", "schema": "CREATE TABLE table_name_18 (film_title_used_in_nomination VARCHAR, country VARCHAR)", "sql": "SELECT film_title_used_in_nomination FROM table_name_18 WHERE country = 'netherlands';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many total gold medals did India receive?", "schema": "CREATE TABLE table_name_10 (gold VARCHAR, nation VARCHAR)", "sql": "SELECT gold FROM table_name_10 WHERE nation = 'india';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was Rodger Ward the winning driver?", "schema": "CREATE TABLE table_name_48 (date VARCHAR, winning_driver VARCHAR)", "sql": "SELECT date FROM table_name_48 WHERE winning_driver = 'rodger ward';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Delete all records of a specific type of intelligence operation in the \"intelligence_ops\" table", "schema": "CREATE TABLE intelligence_ops (id INT, year INT, location VARCHAR(255), type VARCHAR(255), result VARCHAR(255)); INSERT INTO intelligence_ops (id, year, location, type, result) VALUES (1, 2015, 'Russia', 'Surveillance', 'Success'), (2, 2015, 'Germany', 'Infiltration', 'Failure');", "sql": "DELETE FROM intelligence_ops WHERE type = 'Infiltration';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the minimum pH level in brackish water shrimp farms in India?", "schema": "CREATE TABLE water_params (id INT, farm_id INT, country TEXT, water_type TEXT, pH DECIMAL(4,2)); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (1, 1, 'India', 'Brackish', 7.8); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (2, 2, 'India', 'Brackish', 7.6); INSERT INTO water_params (id, farm_id, country, water_type, pH) VALUES (3, 3, 'India', 'Freshwater', 7.9);", "sql": "SELECT MIN(pH) FROM water_params WHERE country = 'India' AND water_type = 'Brackish';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What frequency does Chihuahua Sinaloa Durango cover?", "schema": "CREATE TABLE table_14670060_1 (frequency VARCHAR, coverage VARCHAR)", "sql": "SELECT frequency FROM table_14670060_1 WHERE coverage = 'Chihuahua Sinaloa Durango';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average annual temperature change in the last 5 years for the Arctic region?", "schema": "CREATE TABLE arctic_temperatures (location VARCHAR(50), year INTEGER, temperature FLOAT); INSERT INTO arctic_temperatures (location, year, temperature) VALUES ('Arctic', 2016, -23.4), ('Arctic', 2017, -23.2), ('Arctic', 2018, -23.0), ('Arctic', 2019, -22.8), ('Arctic', 2020, -22.6), ('Arctic', 2021, -22.4);", "sql": "SELECT AVG(temperature - LAG(temperature) OVER (ORDER BY year)) AS avg_annual_temp_change FROM arctic_temperatures WHERE location = 'Arctic' AND year BETWEEN 2016 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 173, "num_statements": 1} {"question": "What is the percentage of players who have completed a given achievement, broken down by platform?", "schema": "CREATE TABLE GameAchievements (PlayerID INT, PlayerName TEXT, Platform TEXT, Achievement TEXT, Completed BOOLEAN); INSERT INTO GameAchievements (PlayerID, PlayerName, Platform, Achievement, Completed) VALUES (1, 'Alice', 'PC', 'Achievement 1', TRUE), (2, 'Bob', 'PC', 'Achievement 1', FALSE), (3, 'Charlie', 'Console', 'Achievement 1', TRUE), (4, 'David', 'Console', 'Achievement 1', TRUE), (5, 'Eve', 'PC', 'Achievement 1', FALSE);", "sql": "SELECT Platform, (COUNT(*) FILTER (WHERE Completed = TRUE)) * 100.0 / COUNT(*) AS PercentageCompleted FROM GameAchievements GROUP BY Platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What title had a remark of #21 us [riaa certified gold january 3, 1990]?", "schema": "CREATE TABLE table_name_61 (title VARCHAR, remark VARCHAR)", "sql": "SELECT title FROM table_name_61 WHERE remark = '#21 us [riaa certified gold january 3, 1990]';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Add a new department 'research' and insert an employee named 'Eli' into it with the 'employee' role and an id of 5.", "schema": "CREATE TABLE departments (id INT, department VARCHAR(50)); CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), role VARCHAR(50)); INSERT INTO departments (id, department) VALUES (1, 'hr'), (2, 'operations'), (3, 'it'), (4, 'marketing'), (5, 'research');", "sql": "INSERT INTO employees (id, name, department, role) VALUES (5, 'Eli', 'research', 'employee');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the maximum altitude reached by space probes?", "schema": "CREATE TABLE Space_Probes (probe_name TEXT, max_altitude FLOAT); INSERT INTO Space_Probes (probe_name, max_altitude) VALUES ('Voyager 1', 21384000000), ('New Horizons', 12670000000), ('Pioneer 10', 12670000000), ('Voyager 2', 11620000000), ('Pioneer 11', 10500000000);", "sql": "SELECT max_altitude as max_altitude_reached FROM Space_Probes ORDER BY max_altitude DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number of points for Drivers that have raced in more than 16 Races?", "schema": "CREATE TABLE table_name_35 (points INTEGER, races INTEGER)", "sql": "SELECT MIN(points) FROM table_name_35 WHERE races > 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Delete the volunteer with ID 2", "schema": "CREATE TABLE volunteers (volunteer_id INT, signup_date DATE); INSERT INTO volunteers (volunteer_id, signup_date) VALUES (1, '2022-01-05'), (2, '2022-03-30'), (3, '2022-04-15'), (4, '2022-06-10');", "sql": "DELETE FROM volunteers WHERE volunteer_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the maximum number of stories in buildings in Brazil and Argentina?", "schema": "CREATE TABLE buildings (id INT, country VARCHAR(255), building_name VARCHAR(255), number_of_stories INT); INSERT INTO buildings (id, country, building_name, number_of_stories) VALUES (1, 'Brazil', 'Edifício Copan', 38), (2, 'Brazil', 'Museum of Tomorrow', 14), (3, 'Argentina', 'Alvear Tower', 54), (4, 'Argentina', 'Kavanagh', 33);", "sql": "SELECT country, MAX(number_of_stories) FROM buildings GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Nominee from the Category that is best costume design?", "schema": "CREATE TABLE table_name_64 (nominee VARCHAR, category VARCHAR)", "sql": "SELECT nominee FROM table_name_64 WHERE category = 'best costume design';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the number of patients treated by gender?", "schema": "CREATE TABLE Patients (PatientID int, Gender varchar(10)); INSERT INTO Patients (PatientID, Gender) VALUES (1, 'Male'), (2, 'Female');", "sql": "SELECT Gender, COUNT(*) FROM Patients GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 86).", "schema": null, "sql": "$$;\n\nCREATE TRIGGER city_delete_trig INSTEAD OF DELETE ON city_view\nFOR EACH ROW EXECUTE PROCEDURE city_delete();\n\nCREATE FUNCTION city_update() RETURNS trigger LANGUAGE plpgsql AS $$\ndeclare\n ctry_id int;", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 208, "num_statements": 3} {"question": "What is the total number of military personnel and military spending for each country involved in defense diplomacy?", "schema": "CREATE TABLE defense_diplomacy (id INT, country VARCHAR, military_personnel INT, military_spending FLOAT);", "sql": "SELECT country, SUM(military_personnel) AS total_military_personnel, SUM(military_spending) AS total_military_spending FROM defense_diplomacy GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest round where the gt winning car is max angelelli?", "schema": "CREATE TABLE table_28490105_1 (rnd INTEGER, gt_winning_car VARCHAR)", "sql": "SELECT MIN(rnd) FROM table_28490105_1 WHERE gt_winning_car = 'Max Angelelli';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 38).", "schema": null, "sql": "SELECT crypt('the minimum number is still observed', '$6$rounds=10$roundstoolow')\n = '$6$rounds=1000$roundstoolow$kUMsbe306n21p9R.FRkW3IGn.S9NPN0x50YhH1xhLsPuWGsUSklZt58jaTfF4ZEQpyUNGc0dqbpBYYBaHHrsX.' AS result;", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which overall pick number went to college at Youngstown State?", "schema": "CREATE TABLE table_10360823_1 (overall INTEGER, college VARCHAR)", "sql": "SELECT MIN(overall) FROM table_10360823_1 WHERE college = 'Youngstown State';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total number of hotels in each country?", "schema": "CREATE TABLE hotels_geo (id INT PRIMARY KEY, hotel_id INT, country VARCHAR(255));", "sql": "SELECT h.country, COUNT(DISTINCT hg.hotel_id) FROM hotels h JOIN hotels_geo hg ON h.id = hg.hotel_id GROUP BY h.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Find the total number of open data sets related to transparency in 'city', 'county', and 'state' schemas.", "schema": "CREATE SCHEMA city; CREATE SCHEMA county; CREATE SCHEMA state; CREATE TABLE city.transparency_data (id INT, name VARCHAR(255), is_open BOOLEAN); CREATE TABLE county.transparency_data (id INT, name VARCHAR(255), is_open BOOLEAN); CREATE TABLE state.transparency_data (id INT, name VARCHAR(255), is_open BOOLEAN); INSERT INTO city.transparency_data (id, name, is_open) VALUES (1, 'budget', true), (2, 'council_meetings', true); INSERT INTO county.transparency_data (id, name, is_open) VALUES (1, 'budget', false), (2, 'council_meetings', true); INSERT INTO state.transparency_data (id, name, is_open) VALUES (1, 'budget', true), (2, 'council_meetings', false);", "sql": "SELECT COUNT(*) FROM ( (SELECT * FROM city.transparency_data WHERE is_open = true) UNION (SELECT * FROM county.transparency_data WHERE is_open = true) UNION (SELECT * FROM state.transparency_data WHERE is_open = true) ) AS combined_transparency_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Show a SQL definition from the postgrest project (big_schema, item 17).", "schema": null, "sql": "CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the place for player John Merrick?", "schema": "CREATE TABLE table_name_52 (place VARCHAR, player VARCHAR)", "sql": "SELECT place FROM table_name_52 WHERE player = 'john merrick';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Location has a Competition of Group Stage, a Lineup of Start and a Date of 2000-09-17?", "schema": "CREATE TABLE table_name_67 (location VARCHAR, date VARCHAR, competition VARCHAR, lineup VARCHAR)", "sql": "SELECT location FROM table_name_67 WHERE competition = 'group stage' AND lineup = 'start' AND date = '2000-09-17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the average budget for ocean floor mapping projects in each region?", "schema": "CREATE TABLE ocean_floor_map (id INT, project_name VARCHAR(255), region VARCHAR(255), budget FLOAT);", "sql": "SELECT region, avg(budget) FROM ocean_floor_map GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Find the total revenue of TV shows produced by Green Studios.", "schema": "CREATE TABLE studio (studio_id INT, name VARCHAR(100)); INSERT INTO studio (studio_id, name) VALUES (1, 'Green Studios'); CREATE TABLE tv_show (tv_show_id INT, title VARCHAR(100), studio_id INT, revenue INT);", "sql": "SELECT SUM(tv_show.revenue) FROM tv_show WHERE tv_show.studio_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the maximum account balance for customers in each region?", "schema": "CREATE TABLE customer_data_2 (customer_id INT, account_balance DECIMAL(10, 2), region VARCHAR(20)); INSERT INTO customer_data_2 (customer_id, account_balance, region) VALUES (1, 5000, 'Latin America'), (2, 7000, 'North America'), (3, 6000, 'Latin America'), (4, 8000, 'Europe'), (5, 9000, 'Asia'); CREATE VIEW customer_data_view AS SELECT region, MAX(account_balance) as max_balance FROM customer_data_2 GROUP BY region;", "sql": "SELECT region, max_balance FROM customer_data_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points are in the scored category for the team that played less than 18 games?", "schema": "CREATE TABLE table_name_41 (scored INTEGER, played INTEGER)", "sql": "SELECT SUM(scored) FROM table_name_41 WHERE played < 18;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_prewarm' (example 3).", "schema": null, "sql": "SELECT pg_prewarm('test', 'buffer');", "explanation": "Example query from the 'pg_prewarm' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the region where Roskilde is the largest city, what is the seat of administration?", "schema": "CREATE TABLE table_16278602_1 (seat_of_administration VARCHAR, largest_city VARCHAR)", "sql": "SELECT seat_of_administration FROM table_16278602_1 WHERE largest_city = 'Roskilde';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 53).", "schema": null, "sql": "select format('%2$s, %1$s', variadic array['first', 'second']);", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('%2$s, %1$s', variadic array['first', 'second'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many sustainable building projects were completed in New York in 2021?", "schema": "CREATE TABLE sustainable_projects (id INT, project_name TEXT, state TEXT, completion_year INT, is_sustainable BOOLEAN); INSERT INTO sustainable_projects (id, project_name, state, completion_year, is_sustainable) VALUES (1, 'Solar Park', 'New York', 2021, true), (2, 'Wind Farm', 'California', 2020, true), (3, 'Green Apartments', 'New York', 2021, true), (4, 'Eco-Hotel', 'Florida', 2020, false);", "sql": "SELECT COUNT(*) FROM sustainable_projects WHERE state = 'New York' AND completion_year = 2021 AND is_sustainable = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Circuit did Johnny Wakefield win?", "schema": "CREATE TABLE table_name_3 (circuit VARCHAR, winning_driver VARCHAR)", "sql": "SELECT circuit FROM table_name_3 WHERE winning_driver = 'johnny wakefield';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 195).", "schema": null, "sql": "SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "Determine the number of wind turbines installed in Germany", "schema": "CREATE TABLE WindTurbines (id INT, name TEXT, height INT, location TEXT); INSERT INTO WindTurbines (id, name, height, location) VALUES (1, 'Enercon E-126', 198, 'Germany'); INSERT INTO WindTurbines (id, name, height, location) VALUES (2, 'Vestas V164', 220, 'Denmark');", "sql": "SELECT COUNT(*) FROM WindTurbines WHERE location = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Delete records for strains sold in Michigan dispensaries with sales amounts less than 500.", "schema": "CREATE TABLE OldDispensary (StrainName VARCHAR(255), SalesAmount DECIMAL(10,2), State VARCHAR(255));", "sql": "DELETE FROM OldDispensary WHERE SalesAmount < 500 AND State = 'Michigan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "List all unique fields from the 'geology' and 'drilling' tables.", "schema": "CREATE TABLE geology (well_id INT, rock_type VARCHAR(50)); CREATE TABLE drilling (well_id INT, drill_depth INT);", "sql": "SELECT field FROM (SELECT 'geology' as table_name, column_name as field FROM information_schema.columns WHERE table_name = 'geology' UNION ALL SELECT 'drilling' as table_name, column_name as field FROM information_schema.columns WHERE table_name = 'drilling') as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 272, "num_statements": 1} {"question": "List the tree species with their respective average timber volume in the private_lands schema, in descending order.", "schema": "CREATE TABLE private_lands.timber_volume (species VARCHAR(255), volume DECIMAL(5,2));", "sql": "SELECT species, AVG(volume) AS avg_volume FROM private_lands.timber_volume GROUP BY species ORDER BY avg_volume DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "What is the minimum age of readers who prefer opinion pieces in the 'Latin America' region?", "schema": "CREATE TABLE readers (id INT, name TEXT, age INT, region TEXT, interest TEXT); INSERT INTO readers (id, name, age, region, interest) VALUES (1, 'Juan Garcia', 28, 'Latin America', 'opinion');", "sql": "SELECT MIN(age) FROM readers WHERE interest = 'opinion' AND region = 'Latin America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 328).", "schema": null, "sql": "select * from float8range_test;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from float8range_test) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "What is the maximum salary of female employees in the Finance department?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Gender VARCHAR(10), Salary FLOAT); INSERT INTO Employees (EmployeeID, Department, Gender, Salary) VALUES (1, 'IT', 'Male', 70000), (2, 'HR', 'Female', 60000), (3, 'IT', 'Female', 75000), (4, 'IT', 'Male', 78000), (5, 'Finance', 'Male', 85000), (6, 'Finance', 'Female', 90000), (7, 'HR', 'Male', 65000);", "sql": "SELECT MAX(Salary) FROM Employees WHERE Department = 'Finance' AND Gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the maximum number of mental health parity complaints filed in the last 6 months for the Native Hawaiian or Other Pacific Islander race?", "schema": "CREATE TABLE mental_health_parity_complaints (complaint_id INT, complaint_date DATE, race VARCHAR(20)); INSERT INTO mental_health_parity_complaints (complaint_id, complaint_date, race) VALUES (1, '2021-07-01', 'Asian'), (2, '2021-03-15', 'Black'), (3, '2021-01-01', 'Native Hawaiian or Other Pacific Islander');", "sql": "SELECT MAX(complaint_count) as max_complaints FROM (SELECT COUNT(*) as complaint_count FROM mental_health_parity_complaints WHERE complaint_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND race = 'Native Hawaiian or Other Pacific Islander' GROUP BY complaint_date) as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 281, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which challenge loser has a Challenge Winner of roseny?", "schema": "CREATE TABLE table_name_81 (challenge_loser VARCHAR, challenge_winner VARCHAR)", "sql": "SELECT challenge_loser FROM table_name_81 WHERE challenge_winner = 'roseny';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "pgTAP test for Pg73 (assertion 21).", "schema": null, "sql": "select is('a'::char, 'a'::char, 'a=a char');", "explanation": "SQL assertion from pgTAP test suite for Pg73.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average of lost for place less than 10, less than 23 points, and goals conceded less than 26 for the Chorrillo team?", "schema": "CREATE TABLE table_name_21 (lost INTEGER, goals_conceded VARCHAR, team VARCHAR, place VARCHAR, points VARCHAR)", "sql": "SELECT AVG(lost) FROM table_name_21 WHERE place < 10 AND points < 23 AND team = 'chorrillo' AND goals_conceded < 26;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the record for december 6", "schema": "CREATE TABLE table_17058151_6 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_17058151_6 WHERE date = 'December 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which tennis player won the most matches in the 'wins' table?", "schema": "CREATE TABLE wins (win_id INT, player_id INT, match_id INT, team_id INT, wins INT); INSERT INTO wins (win_id, player_id, match_id, team_id, wins) VALUES (1, 13, 14, 715, 10);", "sql": "SELECT player_id, MAX(wins) FROM wins;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHEN has a Result of w 23–17?", "schema": "CREATE TABLE table_name_96 (date VARCHAR, result VARCHAR)", "sql": "SELECT date FROM table_name_96 WHERE result = 'w 23–17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which railway was built in 1909?", "schema": "CREATE TABLE table_name_63 (railway VARCHAR, built VARCHAR)", "sql": "SELECT railway FROM table_name_63 WHERE built = '1909';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of production codes for episode #73?", "schema": "CREATE TABLE table_2623498_5 (prod_code VARCHAR, episode__number VARCHAR)", "sql": "SELECT COUNT(prod_code) FROM table_2623498_5 WHERE episode__number = '73';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the actual delivery dates of orders with quantity 1?", "schema": "CREATE TABLE Customer_Orders (Actual_Delivery_Date VARCHAR, Order_ID VARCHAR); CREATE TABLE ORDER_ITEMS (Order_ID VARCHAR, Order_Quantity VARCHAR)", "sql": "SELECT T1.Actual_Delivery_Date FROM Customer_Orders AS T1 JOIN ORDER_ITEMS AS T2 ON T1.Order_ID = T2.Order_ID WHERE T2.Order_Quantity = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the average speed of each cyclist in the 2022 Tour de France?", "schema": "CREATE TABLE cyclists (cyclist_id INT, cyclist_name VARCHAR(50), avg_speed DECIMAL(5,2)); INSERT INTO cyclists (cyclist_id, cyclist_name, avg_speed) VALUES (1, 'Tadej Pogacar', 40.2), (2, 'Jonas Vingegaard', 39.8), (3, 'Richard Carapaz', 39.3), (4, 'Geraint Thomas', 39.1), (5, 'Primož Roglič', 38.9);", "sql": "SELECT AVG(avg_speed) as avg_speed FROM cyclists;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Find the donors who have donated in the last month and their respective donation amounts.", "schema": "CREATE TABLE donor_history (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE); INSERT INTO donor_history VALUES (1, 500.00, '2022-01-01'), (2, 300.00, '2022-01-05'), (3, 800.00, '2022-01-10'), (1, 200.00, '2022-02-01'), (2, 400.00, '2022-02-05'), (3, 100.00, '2022-02-10'), (1, 300.00, '2022-03-01'), (2, 200.00, '2022-03-05'), (3, 600.00, '2022-03-10');", "sql": "SELECT donor_id, donation_amount FROM donor_history WHERE donation_date >= DATEADD(month, -1, CURRENT_DATE) ORDER BY donor_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score of the game on November 24 with the visiting team the Indianapolis Colts?", "schema": "CREATE TABLE table_name_23 (final_score VARCHAR, visiting_team VARCHAR, date VARCHAR)", "sql": "SELECT final_score FROM table_name_23 WHERE visiting_team = 'indianapolis colts' AND date = 'november 24';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the production code for the episode with 5.5 million u.s. viewers?", "schema": "CREATE TABLE table_28195898_1 (production_code VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT production_code FROM table_28195898_1 WHERE us_viewers__millions_ = '5.5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the location when the year is before 1979 and the result is 19-17?", "schema": "CREATE TABLE table_name_68 (location VARCHAR, year VARCHAR, result VARCHAR)", "sql": "SELECT location FROM table_name_68 WHERE year < 1979 AND result = '19-17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the number of juvenile offenders in the 'juvenile_justice' table?", "schema": "CREATE TABLE juvenile_justice (offender_id INT, age INT, offense VARCHAR(50), disposition VARCHAR(30), processing_date DATE);", "sql": "SELECT COUNT(*) FROM juvenile_justice WHERE age < 18;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the average revenue per user (ARPU) for each game genre?", "schema": "CREATE TABLE games (id INT, genre VARCHAR(255), revenue INT); INSERT INTO games (id, genre, revenue) VALUES (1, 'FPS', 100000), (2, 'RPG', 200000), (3, 'FPS', 150000);", "sql": "SELECT genre, AVG(revenue) OVER (PARTITION BY genre) AS ARPU FROM games ORDER BY ARPU DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 91, "num_statements": 1} {"question": "Insert a new organization focused on animal rights and effective altruism with id 5.", "schema": "CREATE TABLE organizations (id INT, name VARCHAR(255), focus VARCHAR(255)); INSERT INTO organizations (id, name, focus) VALUES (3, 'Climate Foundation', 'Climate Change');", "sql": "INSERT INTO organizations (id, name, focus) VALUES (5, 'Animal Rights Effective Altruism', 'Animal Rights, Effective Altruism');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the power output for channel tv-29?", "schema": "CREATE TABLE table_2523809_1 (power__kw_ VARCHAR, channel VARCHAR)", "sql": "SELECT power__kw_ FROM table_2523809_1 WHERE channel = 'TV-29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Opponent has a Surface of hard on 28 august 1993?", "schema": "CREATE TABLE table_name_46 (opponent VARCHAR, surface VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_46 WHERE surface = 'hard' AND date = '28 august 1993';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 5).", "schema": null, "sql": "SELECT * FROM test_timetz WHERE i<'08:55:08 GMT+2'::timetz ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total biomass of cod in the Barents Sea?", "schema": "CREATE TABLE FishData (species VARCHAR(50), location VARCHAR(50), biomass FLOAT); INSERT INTO FishData (species, location, biomass) VALUES ('Atlantic Cod', 'Barents Sea', 1200000), ('Herring', 'Barents Sea', 800000);", "sql": "SELECT location, SUM(biomass) FROM FishData WHERE species = 'Atlantic Cod' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Identify vessels with a rising number of violations.", "schema": "CREATE TABLE VesselInspections (ID INT, Vessel VARCHAR(50), InspectionDate DATE, ViolationCount INT); INSERT INTO VesselInspections (ID, Vessel, InspectionDate, ViolationCount) VALUES (1, 'SS Freedom', '2020-01-01', 3), (2, 'SS Liberty', '2020-01-02', 2), (3, 'SS Eagle', '2020-01-03', 4), (4, 'SS Freedom', '2020-01-04', 4);", "sql": "SELECT Vessel, ViolationCount, LEAD(ViolationCount) OVER (PARTITION BY Vessel ORDER BY InspectionDate) as NextViolationCount FROM VesselInspections;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 148, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Week has Attendance of 51,558?", "schema": "CREATE TABLE table_name_71 (week INTEGER, attendance VARCHAR)", "sql": "SELECT MIN(week) FROM table_name_71 WHERE attendance = 51 OFFSET 558;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Update the \"depth\" column to 4000 for all records in the \"oceanography\" table where the \"ocean_name\" is \"Pacific Ocean\"", "schema": "CREATE TABLE oceanography (id INT PRIMARY KEY, ocean_name VARCHAR(255), depth FLOAT); INSERT INTO oceanography (id, ocean_name, depth) VALUES (1, 'Pacific Ocean', 3970); INSERT INTO oceanography (id, ocean_name, depth) VALUES (2, 'Indian Ocean', 3960);", "sql": "UPDATE oceanography SET depth = 4000 WHERE ocean_name = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many users have engaged with 'Facebook' or 'LinkedIn' posts about data privacy in the last month?", "schema": "CREATE TABLE Facebook(id INT, user_id INT, post_time TIMESTAMP, content TEXT, privacy_topic BOOLEAN); CREATE TABLE LinkedIn(id INT, user_id INT, post_time TIMESTAMP, content TEXT, privacy_topic BOOLEAN);", "sql": "SELECT COUNT(DISTINCT user_id) FROM (SELECT user_id FROM Facebook WHERE post_time >= NOW() - INTERVAL '1 month' AND privacy_topic = TRUE UNION ALL SELECT user_id FROM LinkedIn WHERE post_time >= NOW() - INTERVAL '1 month' AND privacy_topic = TRUE) AS total_users;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 263, "num_statements": 1} {"question": "PostgreSQL regression test 'btree_index': Write the SELECT query (example 36).", "schema": null, "sql": "SELECT proname, proargtypes, pronamespace\n FROM pg_proc\n WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)\nORDER BY proname, proargtypes, pronamespace;", "explanation": "Regression test for Btree Index in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT proname, proargtypes, pronamespace\n FROM pg_proc\n WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL)\nORDER BY proname, proargtypes, pronamespace) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 572).", "schema": null, "sql": "SELECT power('-inf'::numeric, '-2');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT power('-inf'::numeric, '-2')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 108).", "schema": null, "sql": "SELECT * FROM test_type_conversion_array_text(ARRAY['foo', 'bar']);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the id, name of each festival and the number of artworks it has nominated.", "schema": "CREATE TABLE nomination (Festival_ID VARCHAR, Artwork_ID VARCHAR); CREATE TABLE festival_detail (Festival_Name VARCHAR, Festival_ID VARCHAR); CREATE TABLE artwork (Artwork_ID VARCHAR)", "sql": "SELECT T1.Festival_ID, T3.Festival_Name, COUNT(*) FROM nomination AS T1 JOIN artwork AS T2 ON T1.Artwork_ID = T2.Artwork_ID JOIN festival_detail AS T3 ON T1.Festival_ID = T3.Festival_ID GROUP BY T1.Festival_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "What is the total number of criminal cases and their clearance rate by state?", "schema": "CREATE TABLE states (state_id INT PRIMARY KEY, state_name VARCHAR(50)); CREATE TABLE cases (case_id INT PRIMARY KEY, state_id INT, case_type VARCHAR(50), cleared INT); INSERT INTO states VALUES (1, 'California'), (2, 'Texas'), (3, 'New York'); INSERT INTO cases VALUES (1, 1, 'Murder', 1), (2, 1, 'Theft', 0), (3, 2, 'Murder', 1), (4, 2, 'Theft', 1), (5, 3, 'Murder', 0), (6, 3, 'Theft', 1);", "sql": "SELECT s.state_name, COUNT(c.case_id) AS total_cases, SUM(c.cleared) / COUNT(c.case_id) * 100 AS clearance_rate FROM states s JOIN cases c ON s.state_id = c.state_id GROUP BY s.state_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "What is the average funding for projects in the digital divide category?", "schema": "CREATE TABLE projects (id INT, name TEXT, category TEXT, funding FLOAT); INSERT INTO projects (id, name, category, funding) VALUES (1, 'ProjA', 'DigitalDivide', 50000), (2, 'ProjB', 'SocialGood', 35000), (3, 'ProjC', 'DigitalDivide', 75000);", "sql": "SELECT AVG(funding) FROM projects WHERE category = 'DigitalDivide';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table' (example 149).", "schema": null, "sql": "CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a');", "explanation": "DDL from PostgreSQL core regression test for Create Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the time/retired when the grid is less than 3 and 45 laps?", "schema": "CREATE TABLE table_name_95 (time_retired VARCHAR, grid VARCHAR, laps VARCHAR)", "sql": "SELECT time_retired FROM table_name_95 WHERE grid < 3 AND laps = 45;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Calculate the total revenue for the last 3 cricket world cups", "schema": "CREATE TABLE events (id INT, name VARCHAR(50), sport VARCHAR(20), start_date DATE, end_date DATE, ticket_price INT);", "sql": "SELECT SUM(DATEDIFF(end_date, start_date) + 1) * ticket_price FROM events WHERE sport = 'Cricket' AND name LIKE '%World Cup%' AND start_date >= DATE_SUB(CURDATE(), INTERVAL 3 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the maximum diameter at breast height (DBH) for trees in the tropical rainforest biome?", "schema": "CREATE TABLE biomes (biome_id INT PRIMARY KEY, name VARCHAR(50), area_km2 FLOAT); INSERT INTO biomes (biome_id, name, area_km2) VALUES (1, 'Tropical Rainforest', 15000000.0), (2, 'Temperate Rainforest', 250000.0), (3, 'Boreal Forest', 12000000.0); CREATE TABLE trees (tree_id INT PRIMARY KEY, species VARCHAR(50), biome_id INT, dbh FLOAT, FOREIGN KEY (biome_id) REFERENCES biomes(biome_id)); INSERT INTO trees (tree_id, species, biome_id, dbh) VALUES (1, 'Rubber Tree', 1, 80.0), (2, 'Mahogany', 1, 60.0), (3, 'Cacao', 1, 30.0);", "sql": "SELECT MAX(dbh) FROM trees WHERE biomes.name = 'Tropical Rainforest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many collective bargaining agreements were signed in the 'Education' sector between 2015 and 2018?", "schema": "CREATE TABLE CollectiveBargaining (AgreementID INT, Sector VARCHAR(20), SignDate DATE); INSERT INTO CollectiveBargaining (AgreementID, Sector, SignDate) VALUES (1, 'Education', '2015-05-01'), (2, 'Education', '2016-09-15'), (3, 'Healthcare', '2017-03-25');", "sql": "SELECT COUNT(*) FROM CollectiveBargaining WHERE Sector = 'Education' AND SignDate BETWEEN '2015-01-01' AND '2018-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team score of the game with Adelaide as the away team?", "schema": "CREATE TABLE table_name_90 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_90 WHERE away_team = 'adelaide';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the average price range of hotels that have 5 star ratings and allow pets.", "schema": "CREATE TABLE HOTELS (price_range INTEGER, star_rating_code VARCHAR, pets_allowed_yn VARCHAR)", "sql": "SELECT AVG(price_range) FROM HOTELS WHERE star_rating_code = '5' AND pets_allowed_yn = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Update the donation amount for 'Jane Doe' to $500 for her donation made on 2022-05-15.", "schema": "CREATE TABLE Donors (donor_id INT, donor_name TEXT, donation_amount DECIMAL, donation_date DATE);", "sql": "UPDATE Donors SET Donors.donation_amount = 500 WHERE Donors.donor_name = 'Jane Doe' AND Donors.donation_date = '2022-05-15';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Whose origin of Programming offers a general genre, a network of ntv bangla and a service of cogeco cable?", "schema": "CREATE TABLE table_name_12 (origin_of_programming VARCHAR, service VARCHAR, genre VARCHAR, network VARCHAR)", "sql": "SELECT origin_of_programming FROM table_name_12 WHERE genre = 'general' AND network = 'ntv bangla' AND service = 'cogeco cable';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the average number of employees for companies founded by veterans?", "schema": "CREATE TABLE company (id INT, name TEXT, founding_year INT, founder_veteran BOOLEAN); INSERT INTO company (id, name, founding_year, founder_veteran) VALUES (1, 'Acme Inc', 2010, true); INSERT INTO company (id, name, founding_year, founder_veteran) VALUES (2, 'Beta Corp', 2015, false);", "sql": "SELECT AVG(num_employees) FROM company_data INNER JOIN company ON company_data.company_id = company.id WHERE company.founder_veteran = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is fourth when Anders Martinson USA is second?", "schema": "CREATE TABLE table_name_98 (fourth VARCHAR, second VARCHAR)", "sql": "SELECT fourth FROM table_name_98 WHERE second = 'anders martinson usa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Delete records with a budget over 100000 in the 'community_development' table", "schema": "CREATE TABLE community_development (id INT, project_name VARCHAR(255), budget INT, country VARCHAR(255));", "sql": "DELETE FROM community_development WHERE budget > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Please show the team that has the most number of technicians.", "schema": "CREATE TABLE technician (Team VARCHAR)", "sql": "SELECT Team FROM technician GROUP BY Team ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest aired show that's returning on September 13?", "schema": "CREATE TABLE table_name_1 (last_aired INTEGER, returning VARCHAR)", "sql": "SELECT MIN(last_aired) FROM table_name_1 WHERE returning = 'september 13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "What is the total CO2 emissions reduction for each carbon offset project?", "schema": "CREATE TABLE carbon_offset_projects (project_id INT, name VARCHAR(100), co2_reduction INT); INSERT INTO carbon_offset_projects (project_id, name, co2_reduction) VALUES (1, 'Forest Conservation', 10000);", "sql": "SELECT name, SUM(co2_reduction) as total_co2_reduction FROM carbon_offset_projects GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of playoffs held in exactly 2006?", "schema": "CREATE TABLE table_1999350_1 (playoffs VARCHAR, year VARCHAR)", "sql": "SELECT COUNT(playoffs) FROM table_1999350_1 WHERE year = 2006;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the average monthly salary of workers in the 'Labor Rights Union'?", "schema": "CREATE TABLE union_members (member_id INT, member_name VARCHAR(255), union_id INT, monthly_salary DECIMAL(10,2)); CREATE TABLE unions (union_id INT, union_name VARCHAR(255)); INSERT INTO unions (union_id, union_name) VALUES (123, 'United Workers Union'); INSERT INTO unions (union_id, union_name) VALUES (456, 'Labor Rights Union'); INSERT INTO union_members (member_id, member_name, union_id, monthly_salary) VALUES (1, 'John Doe', 456, 3500.50); INSERT INTO union_members (member_id, member_name, union_id, monthly_salary) VALUES (2, 'Jane Doe', 123, 3200.25);", "sql": "SELECT AVG(monthly_salary) FROM union_members WHERE union_id = (SELECT union_id FROM unions WHERE union_name = 'Labor Rights Union');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 778).", "schema": null, "sql": "select jsonb_insert('{\"a\": [0,1,2]}', '{a, -1}', '\"new_value\"', true);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_insert('{\"a\": [0,1,2]}', '{a, -1}', '\"new_value\"', true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "list all astronauts with more than 5 space missions", "schema": "CREATE TABLE Astronauts(astronaut_id INT, name VARCHAR(50), country VARCHAR(50), missions INT);", "sql": "SELECT name FROM Astronauts WHERE missions > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Opponent on Week 11?", "schema": "CREATE TABLE table_name_28 (opponent VARCHAR, week VARCHAR)", "sql": "SELECT opponent FROM table_name_28 WHERE week = 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the team with a qual 1 time of 1:17.481?", "schema": "CREATE TABLE table_name_77 (team VARCHAR, qual_1 VARCHAR)", "sql": "SELECT team FROM table_name_77 WHERE qual_1 = '1:17.481';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Insert a new artist 'Maria Fernandes' in the 'Artists' table", "schema": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(100));", "sql": "INSERT INTO Artists (ArtistID, ArtistName) VALUES (101, 'Maria Fernandes');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the pärnu manager?", "schema": "CREATE TABLE table_27409644_1 (manager VARCHAR, club VARCHAR)", "sql": "SELECT manager FROM table_27409644_1 WHERE club = 'Pärnu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 20).", "schema": null, "sql": "SELECT * FROM booltmp WHERE NOT a;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "How many graduate students are enrolled in STEM programs and have published at least one paper?", "schema": "CREATE TABLE students (id INT, name VARCHAR(50), gender VARCHAR(10), program VARCHAR(50), publications INT); INSERT INTO students (id, name, gender, program, publications) VALUES (1, 'Charlie', 'Non-binary', 'Mathematics', 2), (2, 'Dana', 'Female', 'Physics', 0), (3, 'Eli', 'Male', 'Engineering', 1);", "sql": "SELECT COUNT(*) FROM students WHERE program LIKE 'STEM%' AND publications > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which April has a Game of 84", "schema": "CREATE TABLE table_name_54 (april INTEGER, game VARCHAR)", "sql": "SELECT MAX(april) FROM table_name_54 WHERE game = 84;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the name and efficiency of the solar panel with the highest efficiency in Japan?", "schema": "CREATE TABLE solar_panels (id INT, name VARCHAR(255), efficiency FLOAT); INSERT INTO solar_panels (id, name, efficiency) VALUES (1, 'SolarPanel A', 18.5), (2, 'SolarPanel B', 20.3), (3, 'SolarPanel C', 21.0), (4, 'SolarPanel D', 19.1);", "sql": "SELECT name, MAX(efficiency) FROM solar_panels WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of marine life species observed in the Atlantic Ocean that are not sharks?", "schema": "CREATE TABLE Oceans (id INT, name VARCHAR(20)); INSERT INTO Oceans (id, name) VALUES (1, 'Pacific'), (2, 'Atlantic'); CREATE TABLE SpeciesObservations (id INT, ocean_id INT, species VARCHAR(50), count INT); INSERT INTO SpeciesObservations (id, ocean_id, species, count) VALUES (1, 1, 'Shark', 500), (2, 1, 'Whale', 300), (3, 2, 'Shark', 700), (4, 2, 'Dolphin', 600);", "sql": "SELECT SUM(SpeciesObservations.count) FROM SpeciesObservations JOIN Oceans ON SpeciesObservations.ocean_id = Oceans.id WHERE Oceans.name = 'Atlantic' AND SpeciesObservations.species != 'Shark';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the naame of Investigation of thrombus imaging", "schema": "CREATE TABLE table_name_19 (name VARCHAR, investigation VARCHAR)", "sql": "SELECT name FROM table_name_19 WHERE investigation = 'thrombus imaging';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 74).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION throws_ok ( TEXT, int4, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum number of publications by a faculty member in a single year?", "schema": "CREATE TABLE Publications(PubID INT, FacultyID INT, Year INT, NumArticles INT); INSERT INTO Publications(PubID, FacultyID, Year, NumArticles) VALUES (1, 1, 2019, 3), (2, 1, 2020, 5), (3, 2, 2018, 2), (4, 2, 2019, 4);", "sql": "SELECT FacultyID, MAX(NumArticles) FROM Publications GROUP BY FacultyID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What finish has +21 as the to par?", "schema": "CREATE TABLE table_name_35 (finish VARCHAR, to_par VARCHAR)", "sql": "SELECT finish FROM table_name_35 WHERE to_par = '+21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT tsquery_phrase('a <3> g', 'b & d', 10);", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tsquery_phrase('a <3> g', 'b & d', 10)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the number of unique countries represented in the 'readers' table?", "schema": "CREATE TABLE readers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), country VARCHAR(50));", "sql": "SELECT COUNT(DISTINCT country) FROM readers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "List of countries with the highest environmental impact score in the past year.", "schema": "CREATE TABLE EnvironmentalImpact(id INT, country VARCHAR(50), year INT, score FLOAT); CREATE TABLE CountryPopulation(id INT, country VARCHAR(50), population INT);", "sql": "SELECT country, score FROM (SELECT country, score, ROW_NUMBER() OVER (ORDER BY score DESC) as rank FROM EnvironmentalImpact WHERE year = YEAR(CURRENT_DATE) - 1) AS subquery WHERE rank <= 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 189, "num_statements": 1} {"question": "What is the total number of research grants awarded to faculty members in the College of Education who identify as Indigenous or Native American?", "schema": "CREATE TABLE edu_grants (grant_id INT, grant_amount DECIMAL(10,2), grant_recipient VARCHAR(50), recipient_identity VARCHAR(50)); INSERT INTO edu_grants (grant_id, grant_amount, grant_recipient, recipient_identity) VALUES (1, 35000.00, 'Prof. Rivera', 'Indigenous'), (2, 45000.00, 'Prof. Thompson', 'Native American'), (3, 55000.00, 'Prof. Wang', 'Asian'), (4, 65000.00, 'Prof. Lopez', 'Hispanic');", "sql": "SELECT COUNT(*) FROM edu_grants WHERE grant_recipient LIKE '%College of Education%' AND recipient_identity IN ('Indigenous', 'Native American');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the average number of crops grown per farm in urban agriculture?", "schema": "CREATE TABLE urban_farms (id INT, farm_id INT, crop_type VARCHAR(255));", "sql": "SELECT farm_id, AVG(COUNT(crop_type)) AS average_crops_per_farm FROM urban_farms GROUP BY farm_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the total assets value per client as of the last day of each quarter?", "schema": "CREATE TABLE assets (client_id INT, assets_value FLOAT, assets_date DATE); INSERT INTO assets (client_id, assets_value, assets_date) VALUES (1, 150000.00, '2022-01-01'), (1, 160000.00, '2022-04-01'), (2, 220000.00, '2022-01-01'), (2, 230000.00, '2022-04-01');", "sql": "SELECT client_id, SUM(assets_value) as total_assets_value FROM assets WHERE assets_date IN (SELECT LAST_DAY(date_add(DATE(assets_date), INTERVAL (quarter(assets_date) - 1) * 3 MONTH)) as last_day_of_quarter FROM assets GROUP BY client_id) GROUP BY client_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 527).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Identify factories in Asia with the highest number of labor rights violations.", "schema": "CREATE TABLE factories (id INT, name VARCHAR(100), country VARCHAR(50), violations INT); INSERT INTO factories (id, name, country, violations) VALUES (1, 'AsiaTech Factory', 'China', 15), (2, 'Bright Future Factory', 'Bangladesh', 20), (3, 'Unity Factory', 'Vietnam', 10);", "sql": "SELECT name, country, MAX(violations) as max_violations FROM factories WHERE country IN ('China', 'Bangladesh', 'Vietnam') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the average sales figure for each drug in the oncology department?", "schema": "CREATE TABLE drugs (id INT, name VARCHAR(50), department VARCHAR(50), sales FLOAT); INSERT INTO drugs (id, name, department, sales) VALUES (1, 'DrugA', 'Oncology', 100000), (2, 'DrugB', 'Oncology', 150000), (3, 'DrugC', 'Cardiology', 120000), (4, 'DrugD', 'Cardiology', 160000);", "sql": "SELECT department, name, AVG(sales) as avg_sales FROM drugs WHERE department = 'Oncology' GROUP BY department, name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "How many events had an attendance of over 100 in the last quarter?", "schema": "CREATE TABLE Events (EventID INT, EventDate DATE, EventAttendance INT); INSERT INTO Events (EventID, EventDate, EventAttendance) VALUES (1, '2022-03-12', 120), (2, '2022-04-20', 80), (3, '2022-05-15', 150);", "sql": "SELECT COUNT(*) FROM Events WHERE EventAttendance > 100 AND EventDate >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.icu.utf8': Write the SELECT query (example 18).", "schema": null, "sql": "SELECT * FROM collate_test1 WHERE b COLLATE \"C\" >= 'bbc';", "explanation": "Regression test for Collate.Icu.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM collate_test1 WHERE b COLLATE \"C\" >= 'bbc') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Delete all shipments that were made from the 'Rio de Janeiro' warehouse to 'Brazil' before '2022-02-01'.", "schema": "CREATE TABLE warehouse (id INT PRIMARY KEY, name VARCHAR(50), city VARCHAR(50));CREATE TABLE carrier (id INT PRIMARY KEY, name VARCHAR(50));CREATE TABLE shipment (id INT PRIMARY KEY, warehouse_id INT, carrier_id INT, pallet_count INT, shipped_date DATE);", "sql": "DELETE FROM shipment WHERE warehouse_id = (SELECT id FROM warehouse WHERE city = 'Rio de Janeiro') AND (SELECT name FROM carrier WHERE id = shipment.carrier_id) = 'Brazil' AND shipped_date < '2022-02-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "pgTAP test for Usergroup (assertion 12).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_superuser('aoijaoisjfaoidfjaisjdfosjf', 'desc'),\n false,\n 'isnt_superuser(nonexistent user, desc)',\n 'desc',\n ' User aoijaoisjfaoidfjaisjdfosjf does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Usergroup.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "What is the total number of military aircrafts by type, grouped by their manufacturer and maintenance status?", "schema": "CREATE TABLE Manufacturer (MID INT, Name VARCHAR(50)); INSERT INTO Manufacturer (MID, Name) VALUES (1, 'Lockheed Martin'), (2, 'Boeing'); CREATE TABLE Aircraft (AID INT, Type VARCHAR(50), ManufacturerID INT, MaintenanceStatus VARCHAR(20)); INSERT INTO Aircraft (AID, Type, ManufacturerID, MaintenanceStatus) VALUES (1, 'F-35', 1, 'Inactive'), (2, 'F-15', 1, 'Active'), (3, 'F/A-18', 2, 'Active');", "sql": "SELECT m.Name, a.MaintenanceStatus, COUNT(a.AID) as Total FROM Aircraft a JOIN Manufacturer m ON a.ManufacturerID = m.MID GROUP BY m.Name, a.MaintenanceStatus;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest population (2008) created earlier than 1857, and the county was Sinoe?", "schema": "CREATE TABLE table_name_69 (population__2008_ INTEGER, created VARCHAR, county VARCHAR)", "sql": "SELECT MAX(population__2008_) FROM table_name_69 WHERE created < 1857 AND county = 'sinoe';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 4).", "schema": null, "sql": "INSERT INTO FKTABLE VALUES (2, 3);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of top-5s for the major with 5 top-10s and fewer than 12 cuts made?", "schema": "CREATE TABLE table_name_3 (top_5 INTEGER, top_10 VARCHAR, cuts_made VARCHAR)", "sql": "SELECT AVG(top_5) FROM table_name_3 WHERE top_10 = 5 AND cuts_made < 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of public buildings in New York and Illinois?", "schema": "CREATE TABLE public_buildings (name VARCHAR(255), state VARCHAR(255), size DECIMAL(10,2)); INSERT INTO public_buildings (name, state, size) VALUES ('Building1', 'New York', 120000), ('Building2', 'New York', 150000), ('Building3', 'Illinois', 180000);", "sql": "SELECT SUM(size) FROM public_buildings WHERE state IN ('New York', 'Illinois');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the least amount of wins for Germany in the 1999 season having 0 points?", "schema": "CREATE TABLE table_name_30 (wins INTEGER, seasons VARCHAR, points VARCHAR, country VARCHAR)", "sql": "SELECT MIN(wins) FROM table_name_30 WHERE points = '0' AND country = 'germany' AND seasons = '1999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "List all military equipment maintenance activities performed on aircrafts in the Asia-Pacific region since 2018.", "schema": "CREATE TABLE equipment_maintenance (maintenance_id INT, maintenance_date DATE, equipment_type VARCHAR(255), region VARCHAR(255)); INSERT INTO equipment_maintenance (maintenance_id, maintenance_date, equipment_type, region) VALUES (1, '2018-12-31', 'aircraft', 'Asia-Pacific'), (2, '2019-04-04', 'tank', 'Europe'), (3, '2020-06-15', 'aircraft', 'Asia-Pacific');", "sql": "SELECT * FROM equipment_maintenance WHERE equipment_type = 'aircraft' AND region = 'Asia-Pacific' AND maintenance_date >= '2018-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "What is the average product safety rating for cosmetic products in the French market, and how many products were rated?", "schema": "CREATE TABLE product_safety_ratings (product_id INT, safety_rating INT, country TEXT);", "sql": "SELECT AVG(safety_rating) as avg_safety_rating, COUNT(*) as num_products_rated FROM product_safety_ratings WHERE country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the region for tv uskana?", "schema": "CREATE TABLE table_name_5 (region VARCHAR, name VARCHAR)", "sql": "SELECT region FROM table_name_5 WHERE name = 'tv uskana';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which IHSAA Football Class has a County of 77 sullivan, and a School of union dugger?", "schema": "CREATE TABLE table_name_14 (ihsaa_football_class VARCHAR, county VARCHAR, school VARCHAR)", "sql": "SELECT ihsaa_football_class FROM table_name_14 WHERE county = '77 sullivan' AND school = 'union dugger';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the national cup statistics when the Championship is 21 app / 6 goals?", "schema": "CREATE TABLE table_19333752_1 (national_cup VARCHAR, championship VARCHAR)", "sql": "SELECT national_cup FROM table_19333752_1 WHERE championship = '21 app / 6 goals';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which competition or tour was nordsjælland the opponent with a hr Ground?", "schema": "CREATE TABLE table_name_73 (competition_or_tour VARCHAR, ground VARCHAR, opponent VARCHAR)", "sql": "SELECT competition_or_tour FROM table_name_73 WHERE ground = 'hr' AND opponent = 'nordsjælland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total number of military equipment pieces owned by the Navy, categorized by type?", "schema": "CREATE TABLE Equipment (Equipment_Id INT, Equipment_Type VARCHAR(50), Agency VARCHAR(50), Quantity INT); INSERT INTO Equipment (Equipment_Id, Equipment_Type, Agency, Quantity) VALUES (1, 'Aircraft Carrier', 'Navy', 3); INSERT INTO Equipment (Equipment_Id, Equipment_Type, Agency, Quantity) VALUES (2, 'Submarine', 'Navy', 12); INSERT INTO Equipment (Equipment_Id, Equipment_Type, Agency, Quantity) VALUES (3, 'Destroyer', 'Navy', 25);", "sql": "SELECT Equipment_Type, SUM(Quantity) FROM Equipment WHERE Agency = 'Navy' GROUP BY Equipment_Type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 379).", "schema": null, "sql": "-- drop the trigger, and now we're allowed to attach it again\ndrop trigger child_row_trig on child;", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "List unique types of 'Emergency Relief' and their corresponding total costs.", "schema": "CREATE TABLE Emergency_Relief (relief_type VARCHAR(255), cost INT); INSERT INTO Emergency_Relief (relief_type, cost) VALUES ('Food', 20000), ('Shelter', 30000), ('Medical', 15000);", "sql": "SELECT relief_type, SUM(cost) FROM Emergency_Relief GROUP BY relief_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least points for daniël willemsen / kenny van gaalen", "schema": "CREATE TABLE table_16729457_17 (points INTEGER, driver___passenger VARCHAR)", "sql": "SELECT MIN(points) FROM table_16729457_17 WHERE driver___passenger = 'Daniël Willemsen / Kenny van Gaalen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date was the show aired on the RTL Televizija network?", "schema": "CREATE TABLE table_name_35 (date_aired VARCHAR, network VARCHAR)", "sql": "SELECT date_aired FROM table_name_35 WHERE network = 'rtl televizija';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 250).", "schema": null, "sql": "insert into test_range_spgist select int4range(g, g+10) from generate_series(1,2000) g;", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'txid': Write the SELECT query (example 13).", "schema": null, "sql": "select snap from snapshot_test order by nr;", "explanation": "Regression test for Txid in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select snap from snapshot_test order by nr) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 15).", "schema": null, "sql": "INSERT INTO timetzcmp (r_id,a) SELECT 22,count(*) FROM timetztmp WHERE a <= '07:46:45 GMT+4';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 35).", "schema": null, "sql": "select '{[a,a]}'::textmultirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{[a,a]}'::textmultirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team during the game when the home team was the new zealand breakers?", "schema": "CREATE TABLE table_name_36 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_36 WHERE home_team = 'new zealand breakers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total duration of 'Swimming' workouts for each user in the 'workout_data' table?", "schema": "CREATE TABLE workout_data (user_id INT, workout_type VARCHAR(20), duration INT); INSERT INTO workout_data (user_id, workout_type, duration) VALUES (4, 'Swimming', 300), (5, 'Swimming', 420), (4, 'Swimming', 240), (5, 'Swimming', 540);", "sql": "SELECT user_id, SUM(duration) as total_duration FROM workout_data WHERE workout_type = 'Swimming' GROUP BY user_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the total amount donated by donors with the 'Corporate' type?", "schema": "CREATE TABLE Donors (donor_id INT PRIMARY KEY, donor_name TEXT, donor_type TEXT, amount INT);", "sql": "SELECT SUM(amount) FROM Donors WHERE donor_type = 'Corporate';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert' (example 183).", "schema": null, "sql": "insert into hpart0 values(11);", "explanation": "DML from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the winning song for the artist with a debut album \"the winner\"?", "schema": "CREATE TABLE table_1646960_3 (winning_song VARCHAR, debut_album VARCHAR)", "sql": "SELECT winning_song FROM table_1646960_3 WHERE debut_album = 'The winner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many albums were released in total by Pop artists in 2021?", "schema": "CREATE TABLE artists (artist_id INT, genre VARCHAR(20)); INSERT INTO artists (artist_id, genre) VALUES (1, 'Latin'), (2, 'Pop'), (3, 'Rock'), (4, 'Jazz'), (5, 'Folk'); CREATE TABLE albums (album_id INT, artist_id INT, release_date DATE); INSERT INTO albums (album_id, artist_id, release_date) VALUES (1, 2, '2021-04-12'), (2, 3, '2020-08-21'), (3, 4, '2019-11-01'), (4, 5, '2018-02-23'), (5, 2, '2021-07-15');", "sql": "SELECT COUNT(albums.album_id) FROM albums INNER JOIN artists ON albums.artist_id = artists.artist_id WHERE artists.genre = 'Pop' AND albums.release_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "Find the REE production amounts for each country in 2018 and 2020, including the difference.", "schema": "CREATE TABLE production (country VARCHAR(255), year INT, ree_production INT); INSERT INTO production (country, year, ree_production) VALUES ('China', 2018, 120000), ('China', 2020, 140000), ('USA', 2018, 12000), ('USA', 2020, 15000), ('Australia', 2018, 20000), ('Australia', 2020, 22000);", "sql": "SELECT a.country, SUM(a.ree_production) AS production_2018, SUM(b.ree_production) AS production_2020, SUM(b.ree_production) - SUM(a.ree_production) AS difference FROM production AS a JOIN production AS b ON a.country = b.country WHERE a.year = 2018 AND b.year = 2020 GROUP BY a.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 403).", "schema": null, "sql": "select websearch_to_tsquery('simple', 'a:::b');", "explanation": "Regression test for Tsearch in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select websearch_to_tsquery('simple', 'a:::b')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "How many employees from underrepresented communities are employed in the 'Mining Operations' department across all sites?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Community VARCHAR(50), Site VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name, Department, Community, Site) VALUES (1, 'John Doe', 'Mining Operations', 'Underrepresented', 'Site A'); INSERT INTO Employees (EmployeeID, Name, Department, Community, Site) VALUES (2, 'Jane Smith', 'Mining Operations', 'Represented', 'Site B'); INSERT INTO Employees (EmployeeID, Name, Department, Community, Site) VALUES (3, 'Alice Johnson', 'Mining Operations', 'Underrepresented', 'Site A'); INSERT INTO Employees (EmployeeID, Name, Department, Community, Site) VALUES (4, 'Bob Brown', 'Mining Operations', 'Represented', 'Site B');", "sql": "SELECT COUNT(*) FROM Employees WHERE Department = 'Mining Operations' AND Community = 'Underrepresented';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the total number of students who have ever enrolled in a lifelong learning course per instructor?", "schema": "CREATE TABLE enrollments (student_id INT, instructor_id INT, enrollment_date DATE);", "sql": "SELECT instructor_id, COUNT(DISTINCT student_id) as total_students FROM enrollments GROUP BY instructor_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the maximum budget allocated for sign language interpreters in each state?", "schema": "CREATE TABLE interpreter_budget (state VARCHAR(20), budget INT); INSERT INTO interpreter_budget (state, budget) VALUES ('California', 5000); INSERT INTO interpreter_budget (state, budget) VALUES ('Texas', 7000); INSERT INTO interpreter_budget (state, budget) VALUES ('Florida', 8000);", "sql": "SELECT state, MAX(budget) FROM interpreter_budget GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What are the names and types of all containers in the shipping_container table?", "schema": "CREATE TABLE shipping_container ( id INT PRIMARY KEY, name VARCHAR(255), container_type VARCHAR(255) );", "sql": "SELECT name, container_type FROM shipping_container;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Add a new regulatory record 'New Regulation C' for country 'ES' in Q1 of 2023", "schema": "CREATE TABLE countries (id INT, name VARCHAR(10)); INSERT INTO countries (id, name) VALUES (1, 'FR'), (2, 'DE'); CREATE TABLE regulations (id INT, country VARCHAR(10), quarter DATE, description VARCHAR(50));", "sql": "INSERT INTO countries (id, name) VALUES (3, 'ES'); INSERT INTO regulations (id, country, quarter, description) VALUES (4, 'ES', '2023-01-01', 'New Regulation C');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 2} {"question": "How many basketball games were played in the 2018-2019 season?", "schema": "CREATE TABLE basketball_games (game_id INT, season_year INT, home_team VARCHAR(50), away_team VARCHAR(50));", "sql": "SELECT COUNT(*) FROM basketball_games WHERE season_year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the the club (city/town) with goals for/against being 14-2", "schema": "CREATE TABLE table_14181578_1 (club__city_town_ VARCHAR, goals_for_against VARCHAR)", "sql": "SELECT club__city_town_ FROM table_14181578_1 WHERE goals_for_against = '14-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 15).", "schema": null, "sql": "INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200));", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tyres received 8 points?", "schema": "CREATE TABLE table_name_56 (tyres VARCHAR, points VARCHAR)", "sql": "SELECT tyres FROM table_name_56 WHERE points = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "How many electric vehicle charging stations are there in Germany and Spain combined?", "schema": "CREATE TABLE EVStationCounts (id INT, country VARCHAR(20), num_stations INT); INSERT INTO EVStationCounts (id, country, num_stations) VALUES (1, 'Germany', 3000), (2, 'Spain', 1500), (3, 'France', 2000);", "sql": "SELECT COUNT(*) FROM EVStationCounts WHERE country IN ('Germany', 'Spain');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the away team when north melbourne is at home?", "schema": "CREATE TABLE table_name_99 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_99 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the minimum depth in the Southern Ocean where phytoplankton are present?", "schema": "CREATE TABLE phytoplankton_depth (id INT, location VARCHAR(50), depth FLOAT, phytoplankton_present BOOLEAN); INSERT INTO phytoplankton_depth (id, location, depth, phytoplankton_present) VALUES (1, 'Southern Ocean', 50.0, TRUE); INSERT INTO phytoplankton_depth (id, location, depth, phytoplankton_present) VALUES (2, 'Southern Ocean', 75.0, TRUE);", "sql": "SELECT MIN(depth) FROM phytoplankton_depth WHERE location = 'Southern Ocean' AND phytoplankton_present = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the minimum quantity of sustainable packaging materials available for orders?", "schema": "CREATE TABLE SustainablePackaging (id INT, material VARCHAR(50), quantity INT); INSERT INTO SustainablePackaging (id, material, quantity) VALUES (1, 'Recycled Cardboard Boxes', 2000), (2, 'Biodegradable Bags', 5000), (3, 'Plant-Based Packing Peanuts', 3000);", "sql": "SELECT MIN(quantity) FROM SustainablePackaging;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the episode with a 999,000 BARB rating first aired in Denmark?", "schema": "CREATE TABLE table_26591309_3 (first_broadcast_denmark___dr1__ VARCHAR, official_barb_ratings VARCHAR)", "sql": "SELECT first_broadcast_denmark___dr1__ FROM table_26591309_3 WHERE official_barb_ratings = '999,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 468).", "schema": null, "sql": "SELECT '\\x123456'::bytea::int2; -- error\n\nSELECT ''::bytea::int4 AS \"0\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '\\x123456'::bytea::int2; -- error\n\nSELECT ''::bytea::int4 AS \"0\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 2} {"question": "What is the maximum number of events each museum ('museum' table) hosted in a year?", "schema": "CREATE TABLE museum (id INT, name VARCHAR(50), location VARCHAR(50), year_established INT);CREATE TABLE event (id INT, museum_id INT, name VARCHAR(50), year INT, revenue INT);", "sql": "SELECT museum.name, MAX(event_year_count) FROM (SELECT museum_id, COUNT(*) AS event_year_count FROM event GROUP BY museum_id, year) AS subquery JOIN museum ON museum.id = subquery.museum_id GROUP BY museum.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "PostgreSQL regression test 'constraints': Write the SELECT query (example 200).", "schema": null, "sql": "SELECT conname, conrelid::regclass FROM pg_constraint\n WHERE conname LIKE 'parted_uniq%' ORDER BY conname;", "explanation": "Regression test for Constraints in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT conname, conrelid::regclass FROM pg_constraint\n WHERE conname LIKE 'parted_uniq%' ORDER BY conname) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Find the average number of tickets sold and total number of home games played by the 'Dallas Mavericks' in the 'Southwest' division for the year 2020. Assume the 'games' table has columns 'team_name', 'sale_year', 'num_tickets_sold', 'is_home_game'.", "schema": "CREATE TABLE TEAMS (team_name VARCHAR(50), division VARCHAR(50)); INSERT INTO TEAMS (team_name, division) VALUES ('Dallas Mavericks', 'Southwest'); CREATE TABLE games (team_name VARCHAR(50), sale_year INT, num_tickets_sold INT, is_home_game BOOLEAN); INSERT INTO games (team_name, sale_year, num_tickets_sold, is_home_game) VALUES ('Dallas Mavericks', 2020, 18000, TRUE);", "sql": "SELECT AVG(num_tickets_sold), COUNT(*) FROM games WHERE team_name = 'Dallas Mavericks' AND sale_year = 2020 AND is_home_game = TRUE AND division = (SELECT division FROM TEAMS WHERE team_name = 'Dallas Mavericks');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 213, "num_statements": 1} {"question": "List the total number of employees per job category and the average salary for each category in the HR database.", "schema": "CREATE TABLE employees (id INT, name TEXT, job_category TEXT, salary INT); INSERT INTO employees (id, name, job_category, salary) VALUES (1, 'John Doe', 'Engineering', 70000), (2, 'Jane Smith', 'Management', 90000), (3, 'Bob Johnson', 'Assembly', 50000), (4, 'Alice Williams', 'Engineering', 75000), (5, 'Charlie Brown', 'Assembly', 55000);", "sql": "SELECT job_category, COUNT(*) as total_employees, AVG(salary) as avg_salary FROM employees GROUP BY job_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What was the total expenditure of eco-tourists in Costa Rica and Belize in 2020?", "schema": "CREATE TABLE tourism_stats (country VARCHAR(255), year INT, tourism_type VARCHAR(255), expenditure DECIMAL(10, 2)); INSERT INTO tourism_stats (country, year, tourism_type, expenditure) VALUES ('Costa Rica', 2020, 'Eco-tourism', 500000), ('Costa Rica', 2020, 'Eco-tourism', 600000), ('Belize', 2020, 'Eco-tourism', 400000), ('Belize', 2020, 'Eco-tourism', 450000);", "sql": "SELECT SUM(expenditure) AS total_expenditure FROM tourism_stats WHERE country IN ('Costa Rica', 'Belize') AND tourism_type = 'Eco-tourism' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the game in Week 9 played?", "schema": "CREATE TABLE table_name_70 (date VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_70 WHERE week = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 78).", "schema": null, "sql": "CREATE FUNCTION ltree_picksplit(internal, internal)\nRETURNS internal as 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "PostgreSQL Rules: show example 61.", "schema": null, "sql": "CREATE VIEW phone_number AS SELECT person, phone FROM phone_data WHERE phone NOT LIKE '412%';", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List all customers with fraudulent transactions in Q2 2022.", "schema": "CREATE TABLE customers (customer_id INT, customer_name TEXT); CREATE TABLE transactions (transaction_id INT, customer_id INT, transaction_date DATE, is_fraudulent BOOLEAN);", "sql": "SELECT c.customer_id, c.customer_name FROM customers c JOIN transactions t ON c.customer_id = t.customer_id WHERE t.transaction_date BETWEEN '2022-04-01' AND '2022-06-30' AND t.is_fraudulent = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the location of the Havaianas Beachley Classic, held in Australia?", "schema": "CREATE TABLE table_name_99 (location VARCHAR, country VARCHAR, event VARCHAR)", "sql": "SELECT location FROM table_name_99 WHERE country = 'australia' AND event = 'havaianas beachley classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the draws with less than 2284 against, 8 losses, and more than 10 wins?", "schema": "CREATE TABLE table_name_61 (draws INTEGER, wins VARCHAR, against VARCHAR, losses VARCHAR)", "sql": "SELECT SUM(draws) FROM table_name_61 WHERE against < 2284 AND losses = 8 AND wins > 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What's the total donation amount given to the Orphan Support program in Afghanistan and the Refugee Assistance program in Pakistan?", "schema": "CREATE TABLE donations (id INT, donor_id INT, program_id INT, amount INT); CREATE TABLE donors (id INT, name TEXT, age INT); CREATE TABLE programs (id INT, name TEXT, location TEXT); INSERT INTO donations VALUES (1, 1, 1, 500), (2, 2, 2, 300), (3, 3, 1, 700), (4, 4, 2, 600); INSERT INTO donors VALUES (1, 'Abdul Rauf', 35), (2, 'Siddique Khan', 40), (3, 'Mohammad Hashim', 45), (4, 'Hamid Karzai', 50); INSERT INTO programs VALUES (1, 'Orphan Support', 'Afghanistan'), (2, 'Refugee Assistance', 'Pakistan');", "sql": "SELECT SUM(d.amount) FROM donations d INNER JOIN programs p ON d.program_id = p.id WHERE p.name IN ('Orphan Support', 'Refugee Assistance') AND p.location IN ('Afghanistan', 'Pakistan');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Draws have Losses smaller than 16, and a Team of university, and Wins larger than 0?", "schema": "CREATE TABLE table_name_38 (draws INTEGER, wins VARCHAR, losses VARCHAR, team VARCHAR)", "sql": "SELECT AVG(draws) FROM table_name_38 WHERE losses < 16 AND team = 'university' AND wins > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 231).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (5,2,'994877526002806872754341495993610.60896951623817756834461124123286284017021118170033801249797242818270444792350668237291391010826978126604392715751281366489250793073354867755345743514510156309395711933053460228041067059994425117350974491367099004404995846913641329458537237789584653041949090121498951516476399288513593944575192159570458664608461677113504914551578443229008454218964701550932948083369656042643364608405637360180021322967144409944099438498649645368196191999692949583952927486593144959284443545794934667002661774373364219852712996869245745722896071593910890197478196462961042627387162830776094709087748993678069776845437889735782063');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 685, "num_statements": 1} {"question": "List all startups founded by African entrepreneurs in the 'Fintech' industry", "schema": "CREATE TABLE startups(id INT, name TEXT, founded_year INT, founder_ethnicity TEXT, industry TEXT); INSERT INTO startups (id, name, founded_year, founder_ethnicity, industry) VALUES (1, 'Delta Enterprises', 2020, 'Latinx', 'Retail'); INSERT INTO startups (id, name, founded_year, founder_ethnicity, industry) VALUES (2, 'Epsilon Co', 2018, 'Asian', 'Education'); INSERT INTO startups (id, name, founded_year, founder_ethnicity, industry) VALUES (3, 'Theta Startup', 2019, 'African', 'Fintech');", "sql": "SELECT * FROM startups WHERE founder_ethnicity = 'African' AND industry = 'Fintech';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total investment in social impact bonds in the Caribbean?", "schema": "CREATE TABLE social_impact_bonds (id INT, region VARCHAR(50), investment FLOAT); INSERT INTO social_impact_bonds (id, region, investment) VALUES (1, 'Caribbean', 250000); INSERT INTO social_impact_bonds (id, region, investment) VALUES (2, 'Caribbean', 300000);", "sql": "SELECT SUM(investment) FROM social_impact_bonds WHERE region = 'Caribbean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of patients who received therapy in South Africa?", "schema": "CREATE TABLE patients (id INT, country VARCHAR(255), therapy_received BOOLEAN); INSERT INTO patients (id, country, therapy_received) VALUES (1, 'South Africa', true), (2, 'South Africa', false);", "sql": "SELECT COUNT(*) FROM patients WHERE country = 'South Africa' AND therapy_received = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Delete a network usage record from the network_usage table", "schema": "CREATE TABLE network_usage (usage_id INT, subscriber_id INT, usage_date DATE, usage_type VARCHAR(50), usage_duration INT);", "sql": "DELETE FROM network_usage WHERE usage_id = 3001;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the report for 10 october?", "schema": "CREATE TABLE table_name_13 (report VARCHAR, date VARCHAR)", "sql": "SELECT report FROM table_name_13 WHERE date = '10 october';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of properties in the city of Seoul, South Korea that have green roofs?", "schema": "CREATE TABLE seoul_real_estate(id INT, city VARCHAR(50), green_roof BOOLEAN); INSERT INTO seoul_real_estate VALUES (1, 'Seoul', true);", "sql": "SELECT COUNT(*) FROM seoul_real_estate WHERE city = 'Seoul' AND green_roof = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'int4': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT i.f1, i.f1 * int4 '2' AS x FROM INT4_TBL i\nWHERE abs(f1) < 1073741824;", "explanation": "Regression test for Int4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT i.f1, i.f1 * int4 '2' AS x FROM INT4_TBL i\nWHERE abs(f1) < 1073741824) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the team play on january 9?", "schema": "CREATE TABLE table_27733258_8 (team VARCHAR, date VARCHAR)", "sql": "SELECT team FROM table_27733258_8 WHERE date = 'January 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Display the percentage of patients who have experienced cultural competency in healthcare services, by their ethnicity, in descending order.", "schema": "CREATE TABLE patient (patient_id INT, ethnicity VARCHAR(255), experienced_cultural_competency BOOLEAN); INSERT INTO patient (patient_id, ethnicity, experienced_cultural_competency) VALUES (1, 'Hispanic', TRUE), (2, 'Asian', FALSE), (3, 'White', TRUE), (4, 'Black', TRUE);", "sql": "SELECT ethnicity, 100.0 * SUM(experienced_cultural_competency) / COUNT(*) as percentage FROM patient GROUP BY ethnicity ORDER BY percentage DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What is the number of job applications received, by job title, in the last month?", "schema": "CREATE TABLE job_applications (application_id INT, name TEXT, job_title TEXT, application_date DATE); INSERT INTO job_applications (application_id, name, job_title, application_date) VALUES (1, 'Alice', 'HR Manager', '2022-01-01'), (2, 'Bob', 'Software Engineer', '2022-01-05'), (3, 'Charlie', 'Software Engineer', '2022-01-10'), (4, 'Dave', 'Sales Manager', '2022-01-15'), (5, 'Eve', 'Software Engineer', '2022-01-20');", "sql": "SELECT job_title, COUNT(*) AS num_applications FROM job_applications WHERE application_date >= DATEADD(month, -1, GETDATE()) GROUP BY job_title;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For Pigeon Creek what is the per capita income?", "schema": "CREATE TABLE table_1840495_2 (per_capita_income VARCHAR, place VARCHAR)", "sql": "SELECT per_capita_income FROM table_1840495_2 WHERE place = 'Pigeon Creek';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score on April 5?", "schema": "CREATE TABLE table_name_64 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_64 WHERE date = 'april 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 71).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_ancestor_of( 'hide', 'h_parent', 'hide', 'nope', 2 ),\n true,\n 'isnt_ancestor_of(psch, ptab, csch, nope, 2)',\n 'Table hide.h_parent should not be ancestor 2 for hide.nope',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player's runs are 270?", "schema": "CREATE TABLE table_name_43 (player VARCHAR, runs VARCHAR)", "sql": "SELECT player FROM table_name_43 WHERE runs = '270';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who tied with 8 losses and 50% wins?", "schema": "CREATE TABLE table_name_19 (tied VARCHAR, losses VARCHAR, win__percentage VARCHAR)", "sql": "SELECT tied FROM table_name_19 WHERE losses = '8' AND win__percentage = '50%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the average number of autonomous taxis in the taxis table for each city?", "schema": "CREATE TABLE taxis (id INT, city TEXT, vehicle_type TEXT, fuel_type TEXT, total_taxis INT); INSERT INTO taxis (id, city, vehicle_type, fuel_type, total_taxis) VALUES (1, 'San Francisco', 'Taxi', 'Autonomous', 100), (2, 'New York', 'Taxi', 'Gasoline', 800), (3, 'Los Angeles', 'Taxi', 'Autonomous', 150);", "sql": "SELECT city, AVG(total_taxis) as avg_autonomous_taxis FROM taxis WHERE vehicle_type = 'Taxi' AND fuel_type = 'Autonomous' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 35).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (0,5,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What candidate is in Louisiana 6?", "schema": "CREATE TABLE table_1342331_18 (candidates VARCHAR, district VARCHAR)", "sql": "SELECT candidates FROM table_1342331_18 WHERE district = 'Louisiana 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Find players who have the same email address in 'esports_players' table.", "schema": "CREATE TABLE esports_players (PlayerID INT, Name VARCHAR(20), Email VARCHAR(30)); INSERT INTO esports_players (PlayerID, Name, Email) VALUES (1, 'John Doe', 'johndoe@example.com'); INSERT INTO esports_players (PlayerID, Name, Email) VALUES (2, 'Jane Doe', 'johndoe@example.com');", "sql": "SELECT PlayerID, Email FROM esports_players WHERE Email IN (SELECT Email FROM esports_players GROUP BY Email HAVING COUNT(*) > 1);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "List all students who have never taken a mental health assessment, along with their IDs, from the 'student_mental_health' table.", "schema": "CREATE TABLE student_mental_health (student_id INT, assessment_date DATE, assessment_score INT);", "sql": "SELECT student_id FROM student_mental_health WHERE student_id NOT IN (SELECT student_id FROM student_mental_health WHERE assessment_date IS NOT NULL);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum population size in the town of Stanthorpe?", "schema": "CREATE TABLE table_12584173_1 (population__stanthorpe_ INTEGER)", "sql": "SELECT MAX(population__stanthorpe_) FROM table_12584173_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Identify menu items with a price above the 75th percentile of their category's prices.", "schema": "CREATE TABLE Menu (MenuID INT, MenuItem VARCHAR(50), Category VARCHAR(50), Price DECIMAL(5,2));", "sql": "SELECT MenuItem, Category, Price FROM (SELECT MenuItem, Category, Price, NTILE(4) OVER (PARTITION BY Category ORDER BY Price DESC) AS PriceQuantile FROM Menu) AS Subquery WHERE PriceQuantile = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position did the player have who was from the college of california?", "schema": "CREATE TABLE table_name_55 (position VARCHAR, college VARCHAR)", "sql": "SELECT position FROM table_name_55 WHERE college = 'california';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many articles were published in Spanish by the 'El País' newspaper in 2021?", "schema": "CREATE TABLE Articles (id INT, publication_date DATE, language VARCHAR(255), newspaper VARCHAR(255), word_count INT); INSERT INTO Articles (id, publication_date, language, newspaper, word_count) VALUES (1, '2021-01-01', 'Spanish', 'El País', 800), (2, '2021-02-02', 'English', 'The New York Times', 500), (3, '2021-03-03', 'Spanish', 'El País', 600), (4, '2021-04-04', 'French', 'Le Monde', 700);", "sql": "SELECT COUNT(*) FROM Articles WHERE language = 'Spanish' AND newspaper = 'El País' AND YEAR(publication_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "pgTAP test for Pktap (assertion 11).", "schema": null, "sql": "SELECT * FROM check_test(\n has_pk( 'hide', 'hidesometab'::name ),\n true,\n 'has_pk( hideschema, hidetable )',\n 'Table hide.hidesometab should have a primary key',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Pktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "PostgreSQL regression test 'create_function_sql': Write the SELECT query (example 65).", "schema": null, "sql": "SELECT * FROM functest_sri1();", "explanation": "Regression test for Create Function Sql in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM functest_sri1()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Delete the music track 'Eternal Love' by 'Sarah Brightman'.", "schema": "CREATE TABLE music_track (track_id INT, title VARCHAR(100), artist VARCHAR(100)); INSERT INTO music_track (track_id, title, artist) VALUES (1, 'Eternal Love', 'Sarah Brightman');", "sql": "DELETE FROM music_track WHERE title = 'Eternal Love' AND artist = 'Sarah Brightman';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total waste generated by each department?", "schema": "CREATE TABLE factories (factory_id INT, department VARCHAR(20), waste_generated_kg INT); INSERT INTO factories VALUES (1, 'textiles', 500), (2, 'metalwork', 300), (3, 'textiles', 700), (4, 'electronics', 400), (5, 'textiles', 600);", "sql": "SELECT department, SUM(waste_generated_kg) FROM factories GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Provide a list of intelligence operations that took place in Europe in the year 2019", "schema": "CREATE TABLE intelligence_operations (id INT, name TEXT, location TEXT, year INT); INSERT INTO intelligence_operations (id, name, location, year) VALUES (1, 'Operation Red Falcon', 'Germany', 2019), (2, 'Operation Night Hawk', 'France', 2018), (3, 'Operation Black Swan', 'UK', 2019);", "sql": "SELECT name FROM intelligence_operations WHERE location IN ('Germany', 'France', 'UK') AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Insert a new record in the athlete table with the following data: id=10, name='Alex Rodriguez', team='New York Yankees'.", "schema": "CREATE TABLE athlete (id INT, name VARCHAR(50), team VARCHAR(50));", "sql": "INSERT INTO athlete (id, name, team) VALUES (10, 'Alex Rodriguez', 'New York Yankees');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What are the total expenses for SpaceX and Blue Origin for their launch vehicles?", "schema": "CREATE TABLE spacex_expenses (company VARCHAR(20), launch_vehicle VARCHAR(30), expenses INT); INSERT INTO spacex_expenses (company, launch_vehicle, expenses) VALUES ('SpaceX', 'Falcon 1', 50000000), ('SpaceX', 'Falcon 9', 60000000), ('SpaceX', 'Falcon Heavy', 90000000); CREATE TABLE blue_origin_expenses (company VARCHAR(20), launch_vehicle VARCHAR(30), expenses INT); INSERT INTO blue_origin_expenses (company, launch_vehicle, expenses) VALUES ('Blue Origin', 'New Shepard', 100000000), ('Blue Origin', 'New Glenn', 150000000);", "sql": "SELECT SUM(expenses) FROM spacex_expenses JOIN blue_origin_expenses ON FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the minimum age for health insurance policyholders in Florida?", "schema": "CREATE TABLE policyholders (id INT, state VARCHAR(2), policy_type VARCHAR(20), age INT); INSERT INTO policyholders (id, state, policy_type, age) VALUES (1, 'FL', 'Life', 25), (2, 'FL', 'Health', 30), (3, 'FL', 'Health', 40);", "sql": "SELECT MIN(age) FROM policyholders WHERE state = 'FL' AND policy_type = 'Health';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average age of players who have played Virtual Reality games?", "schema": "CREATE TABLE players (id INT, age INT, country VARCHAR(50), vrgames BOOLEAN); INSERT INTO players (id, age, country, vrgames) VALUES (1, 25, 'Canada', true), (2, 30, 'USA', false);", "sql": "SELECT AVG(age) FROM players WHERE vrgames = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the HDTV when documentaries are the content?", "schema": "CREATE TABLE table_name_84 (hdtv VARCHAR, content VARCHAR)", "sql": "SELECT hdtv FROM table_name_84 WHERE content = 'documentaries';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won group III when group 1 was of the racing club?", "schema": "CREATE TABLE table_name_99 (group_iII VARCHAR, group_i VARCHAR)", "sql": "SELECT group_iII FROM table_name_99 WHERE group_i = 'racing club';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total crowd size for the him team footscray?", "schema": "CREATE TABLE table_name_95 (crowd VARCHAR, home_team VARCHAR)", "sql": "SELECT COUNT(crowd) FROM table_name_95 WHERE home_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which stage number did son bou arrive at?", "schema": "CREATE TABLE table_name_22 (stage VARCHAR, arrival VARCHAR)", "sql": "SELECT COUNT(stage) FROM table_name_22 WHERE arrival = 'son bou';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance record for Leicester City?", "schema": "CREATE TABLE table_name_17 (attendance VARCHAR, home_team VARCHAR)", "sql": "SELECT attendance FROM table_name_17 WHERE home_team = 'leicester city';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total investment and investment rank for companies in the 'USA'?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50), founding_year INT, industry VARCHAR(50), country VARCHAR(50)); INSERT INTO company (id, name, founding_year, industry, country) VALUES (1, 'Acme Inc', 2010, 'Tech', 'USA'); CREATE TABLE investment (id INT, company_id INT, investment_amount INT, investment_round VARCHAR(50), investment_date DATE); INSERT INTO investment (id, company_id, investment_amount, investment_round, investment_date) VALUES (1, 1, 5000000, 'Series A', '2012-04-01');", "sql": "SELECT company_id, SUM(investment_amount) as total_investment, RANK() OVER (PARTITION BY company_id ORDER BY SUM(investment_amount) DESC) as investment_rank FROM investment WHERE (SELECT country FROM company WHERE id = company_id) = 'USA' GROUP BY company_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average week number of all the matches where less than 22,604 people attended?", "schema": "CREATE TABLE table_name_75 (week INTEGER, attendance INTEGER)", "sql": "SELECT AVG(week) FROM table_name_75 WHERE attendance < 22 OFFSET 604;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Placings have a Nation of west germany, and Points larger than 303.72?", "schema": "CREATE TABLE table_name_31 (placings INTEGER, nation VARCHAR, points VARCHAR)", "sql": "SELECT SUM(placings) FROM table_name_31 WHERE nation = 'west germany' AND points > 303.72;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What NFL team did the Tight End position belong to?", "schema": "CREATE TABLE table_name_62 (nfl_team VARCHAR, position VARCHAR)", "sql": "SELECT nfl_team FROM table_name_62 WHERE position = 'tight end';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 26).", "schema": null, "sql": "CREATE OPERATOR - (\n\tLEFTARG = hstore,\n\tRIGHTARG = text[],\n\tPROCEDURE = delete\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team was in 1st plate in a year later than 1956?", "schema": "CREATE TABLE table_name_84 (year INTEGER)", "sql": "SELECT 1 AS st_place_team FROM table_name_84 WHERE year > 1956;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "List all unique waste types from the 'waste_types' table", "schema": "CREATE TABLE waste_types (waste_type_id INT, waste_type_name VARCHAR(50));", "sql": "SELECT DISTINCT waste_type_name FROM waste_types;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total data usage for mobile customers in the city of London in the past week?", "schema": "CREATE TABLE mobile_customers (customer_id INT, city VARCHAR(20), data_usage FLOAT); INSERT INTO mobile_customers (customer_id, city, data_usage) VALUES (1, 'London', 3000), (2, 'Paris', 4000), (3, 'London', 2500);", "sql": "SELECT SUM(data_usage) FROM mobile_customers WHERE city = 'London' AND last_usage_date >= DATEADD(week, -1, CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance on August 23?", "schema": "CREATE TABLE table_name_77 (attendance VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_77 WHERE date = 'august 23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the awardee for tingya", "schema": "CREATE TABLE table_24446718_3 (awardee_s_ VARCHAR, name_of_film VARCHAR)", "sql": "SELECT awardee_s_ FROM table_24446718_3 WHERE name_of_film = 'Tingya';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all suppliers from the United States that provide materials to companies with ethical manufacturing practices.", "schema": "CREATE TABLE suppliers (id INT, name TEXT, country TEXT, ethical_practices BOOLEAN); INSERT INTO suppliers (id, name, country, ethical_practices) VALUES (1, 'XYZ Supplies', 'USA', TRUE), (2, 'LMN Supplies', 'Canada', FALSE), (3, 'OPQ Supplies', 'USA', TRUE); CREATE TABLE purchases (id INT, supplier_id INT, company_id INT, ethical_manufacturing BOOLEAN); INSERT INTO purchases (id, supplier_id, company_id, ethical_manufacturing) VALUES (1, 1, 1, TRUE), (2, 2, 1, FALSE), (3, 3, 1, TRUE);", "sql": "SELECT s.name FROM suppliers s JOIN purchases p ON s.id = p.supplier_id WHERE s.country = 'USA' AND p.ethical_manufacturing = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 152).", "schema": null, "sql": "SELECT to_char(d, 'FF1 FF2 FF3 FF4 FF5 FF6 ff1 ff2 ff3 ff4 ff5 ff6 MS US')\n FROM (VALUES\n ('2018-11-02 12:34:56'::timestamp),\n ('2018-11-02 12:34:56.78'),\n ('2018-11-02 12:34:56.78901'),\n ('2018-11-02 12:34:56.78901234')\n ) d(d);", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char(d, 'FF1 FF2 FF3 FF4 FF5 FF6 ff1 ff2 ff3 ff4 ff5 ff6 MS US')\n FROM (VALUES\n ('2018-11-02 12:34:56'::timestamp),\n ('2018-11-02 12:34:56.78'),\n ('2018-11-02 12:34:56.78901'),\n ('2018-11-02 12:34:56.78901234')\n ) d(d)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Find the dispensaries with a total sales amount greater than $100,000 for hybrid strains in Washington state.", "schema": "CREATE TABLE DispensarySales(id INT, dispensary VARCHAR(255), state VARCHAR(255), strain_type VARCHAR(255), sales_amount DECIMAL(10,2));", "sql": "SELECT dispensary FROM DispensarySales WHERE state = 'Washington' AND strain_type = 'Hybrid' GROUP BY dispensary HAVING SUM(sales_amount) > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Record, when Score is 104-99?", "schema": "CREATE TABLE table_name_55 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_55 WHERE score = '104-99';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Captain 2 has a Result of final?", "schema": "CREATE TABLE table_name_50 (captain_2 VARCHAR, result VARCHAR)", "sql": "SELECT captain_2 FROM table_name_50 WHERE result = 'final';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the maximum crop yield for farmers in the Quechua community?", "schema": "CREATE TABLE farmers (name VARCHAR(255), tribe VARCHAR(255), crop_yield INT); INSERT INTO farmers (name, tribe, crop_yield) VALUES ('Juan Lopez', 'Quechua', 1200), ('Maria Rodriguez', 'Quechua', 1500), ('Pedro Gutierrez', 'Quechua', 1000), ('Ana Martinez', 'Quechua', 1300);", "sql": "SELECT MAX(crop_yield) FROM farmers WHERE tribe = 'Quechua';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which species has a Voges-Proskauer reading of negative and an indole reading of negative?", "schema": "CREATE TABLE table_name_43 (species VARCHAR, voges_proskauer VARCHAR, indole VARCHAR)", "sql": "SELECT species FROM table_name_43 WHERE voges_proskauer = 'negative' AND indole = 'negative';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Update the 'publication_date' of an article with 'article_id' 1 in the 'articles' table", "schema": "CREATE TABLE articles (article_id INT PRIMARY KEY, title VARCHAR(255), content TEXT, publication_date DATE);", "sql": "UPDATE articles SET publication_date = '2022-01-15' WHERE article_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average salary for each department, along with the number of employees in that department?", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (1, 'John', 'Doe', 'Mining', 75000.00); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (2, 'Jane', 'Doe', 'Environment', 70000.00); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (3, 'Mike', 'Smith', 'Mining', 80000.00);", "sql": "SELECT Department, AVG(Salary) as 'AvgSalary', COUNT(*) as 'EmployeeCount' FROM Employees GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 124).", "schema": null, "sql": "SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was recruited from Calgary?", "schema": "CREATE TABLE table_name_34 (player VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_name_34 WHERE college = 'calgary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Update the salary of employees in the IT department by a 5% increase.", "schema": "CREATE TABLE SalaryData (EmployeeID INT, Department VARCHAR(50), Salary DECIMAL(10, 2)); INSERT INTO SalaryData VALUES (1, 'IT', 50000); INSERT INTO SalaryData VALUES (2, 'HR', 55000); INSERT INTO SalaryData VALUES (3, 'Finance', 60000);", "sql": "UPDATE SalaryData SET Salary = Salary * 1.05 WHERE Department = 'IT';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "shifted_safety_ratings", "schema": "CREATE TABLE workplaces (id INT, name TEXT, safety_rating INT, union_sector TEXT); INSERT INTO workplaces (id, name, safety_rating, union_sector) VALUES (1, 'ABC Company', 88, 'Manufacturing'); INSERT INTO workplaces (id, name, safety_rating, union_sector) VALUES (2, 'XYZ Corporation', 92, 'Transportation');", "sql": "SELECT *, LAG(safety_rating) OVER (PARTITION BY union_sector ORDER BY id) as previous_rating FROM workplaces;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 109, "num_statements": 1} {"question": "What is the average speed of spacecraft launched by NASA?", "schema": "CREATE TABLE spacecraft (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), launch_date DATE, max_speed FLOAT);", "sql": "SELECT AVG(max_speed) FROM spacecraft WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the distance from Jaffa of Jerusalem station?", "schema": "CREATE TABLE table_name_1 (distance_from_jaffa VARCHAR, name_location VARCHAR)", "sql": "SELECT distance_from_jaffa FROM table_name_1 WHERE name_location = 'jerusalem';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average playtime for players who play games in AR, by country?", "schema": "CREATE TABLE Players (PlayerID INT, Country VARCHAR(20), AR boolean, Playtime INT); INSERT INTO Players (PlayerID, Country, AR, Playtime) VALUES (1, 'Japan', true, 80), (2, 'South Korea', false, 60), (3, 'China', true, 90);", "sql": "SELECT Country, AVG(Playtime) FROM Players WHERE AR = true GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total number of vulnerabilities found in the 'NetworkDevices' table, grouped by severity?", "schema": "CREATE TABLE NetworkDevices (id INT, device_name VARCHAR(50), severity VARCHAR(10), discovered_date DATE); INSERT INTO NetworkDevices (id, device_name, severity, discovered_date) VALUES (1, 'Router1', 'High', '2021-08-01'), (2, 'Switch1', 'Medium', '2021-07-15'), (3, 'Firewall1', 'Low', '2021-06-01');", "sql": "SELECT severity, COUNT(*) as total_vulnerabilities FROM NetworkDevices GROUP BY severity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the transaction amount for the most recent transaction of each client?", "schema": "CREATE TABLE transactions (transaction_id INT, client_id INT, transaction_amount DECIMAL(10,2), transaction_date DATE); INSERT INTO transactions VALUES (1, 1, 50000.00, '2022-01-01'), (2, 2, 70000.00, '2022-02-10'), (3, 3, 30000.00, '2022-03-05'), (4, 1, 100000.00, '2022-04-15'), (5, 4, 60000.00, '2022-04-20');", "sql": "SELECT client_id, MAX(transaction_amount) as max_transaction_amount FROM transactions GROUP BY client_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What are the average innovation scores for each product category, grouped by month?", "schema": "CREATE TABLE Product(Id INT, Category VARCHAR(50), ManufacturerId INT); CREATE TABLE InnovationScore(Id INT, Score INT, ProductId INT, ScoreDate DATE);", "sql": "SELECT p.Category, DATE_FORMAT(i.ScoreDate, '%Y-%m') AS Month, AVG(i.Score) AS AverageScore FROM InnovationScore i JOIN Product p ON i.ProductId = p.Id GROUP BY p.Category, Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 157).", "schema": null, "sql": "select f3, myaggn05a(f1) from t group by f3 order by f3;", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select f3, myaggn05a(f1) from t group by f3 order by f3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the average ESG score for each sector in 2022?", "schema": "CREATE TABLE companies (company_id INT, company_name VARCHAR(255), sector VARCHAR(255), esg_score INT, year INT); INSERT INTO companies (company_id, company_name, sector, esg_score, year) VALUES (1, 'GreenTech Inc', 'Technology', 85, 2022), (2, 'EcoFarms Ltd', 'Agriculture', 75, 2022), (3, 'CleanEnergy Co', 'Energy', 90, 2022);", "sql": "SELECT sector, AVG(esg_score) as avg_esg_score FROM companies WHERE year = 2022 GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest Round for Central Florida?", "schema": "CREATE TABLE table_name_32 (round INTEGER, team VARCHAR)", "sql": "SELECT MIN(round) FROM table_name_32 WHERE team = 'central florida';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the home team scores at york park?", "schema": "CREATE TABLE table_16388478_2 (home_team VARCHAR, ground VARCHAR)", "sql": "SELECT home_team AS score FROM table_16388478_2 WHERE ground = 'York Park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 52).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (1,0,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are the endings of examples achilles, appendices, fæces", "schema": "CREATE TABLE table_17798093_20 (ending VARCHAR, examples VARCHAR)", "sql": "SELECT ending FROM table_17798093_20 WHERE examples = 'Achilles, appendices, fæces';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of high severity incidents in the 'IncidentReports' table?", "schema": "CREATE TABLE IncidentReports (id INT, incident_name VARCHAR(50), severity VARCHAR(10), incident_type VARCHAR(50)); INSERT INTO IncidentReports (id, incident_name, severity, incident_type) VALUES (1, 'Incident1', 'High', 'Malware'), (2, 'Incident2', 'Medium', 'Phishing'), (3, 'Incident3', 'Low', 'Unpatched Software'), (4, 'Incident4', 'High', 'SQL Injection'), (5, 'Incident5', 'Low', 'Denial of Service');", "sql": "SELECT COUNT(*) as total_high_severity_incidents FROM IncidentReports WHERE severity = 'High';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What are the total construction costs for each type of building permit in the city of Seattle for the year 2020?", "schema": "CREATE TABLE building_permits (permit_type TEXT, city TEXT, cost INTEGER, year INTEGER);INSERT INTO building_permits (permit_type, city, cost, year) VALUES ('Residential', 'Seattle', 200000, 2020), ('Commercial', 'Seattle', 500000, 2020), ('Industrial', 'Seattle', 300000, 2020);", "sql": "SELECT permit_type, SUM(cost) FROM building_permits WHERE city = 'Seattle' AND year = 2020 GROUP BY permit_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which organization administered the poll that showed a lead margin of 5.5?", "schema": "CREATE TABLE table_name_88 (poll_source VARCHAR, lead_margin VARCHAR)", "sql": "SELECT poll_source FROM table_name_88 WHERE lead_margin = 5.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'cluster': Write the SELECT query (example 48).", "schema": null, "sql": "SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a;", "explanation": "Regression test for Cluster in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many stages were won by Robbie McEwen?", "schema": "CREATE TABLE table_17672470_19 (stage VARCHAR, winner VARCHAR)", "sql": "SELECT COUNT(stage) FROM table_17672470_19 WHERE winner = 'Robbie McEwen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What are the average recycling rates in percentage for each district in Southeast Asia in 2020?", "schema": "CREATE TABLE recycling_rates_southeast_asia (district VARCHAR(50), year INT, recycling_rate FLOAT); INSERT INTO recycling_rates_southeast_asia (district, year, recycling_rate) VALUES ('Bangkok', 2020, 30.0), ('Ho_Chi_Minh_City', 2020, 25.0), ('Jakarta', 2020, 22.0), ('Kuala_Lumpur', 2020, 35.0), ('Singapore', 2020, 45.0);", "sql": "SELECT district, AVG(recycling_rate) FROM recycling_rates_southeast_asia WHERE year = 2020 GROUP BY district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many outcomes occur for score in the final of 6–3, 6–4, 7–6 (13–11)?", "schema": "CREATE TABLE table_26202940_6 (outcome VARCHAR, score_in_the_final VARCHAR)", "sql": "SELECT COUNT(outcome) FROM table_26202940_6 WHERE score_in_the_final = '6–3, 6–4, 7–6 (13–11)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (vacuum, item 23).", "schema": null, "sql": "--Similar to normal vacuum tests, but PG11 introduced ability to vacuum multiple tables at once, we make sure that works for hypertables as well.\nCREATE TABLE vacuum_test(time timestamp, temp float);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who got the first position when Ehime Shimanami got the third position?", "schema": "CREATE TABLE table_21632864_1 (champions VARCHAR, third_place VARCHAR)", "sql": "SELECT champions FROM table_21632864_1 WHERE third_place = 'Ehime Shimanami';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the maximum age of a mental health professional in Georgia?", "schema": "CREATE TABLE MentalHealthProfessional (ProfessionalID INT, Age INT, Specialty VARCHAR(50), State VARCHAR(20)); INSERT INTO MentalHealthProfessional (ProfessionalID, Age, Specialty, State) VALUES (1, 50, 'Psychologist', 'Georgia'); INSERT INTO MentalHealthProfessional (ProfessionalID, Age, Specialty, State) VALUES (2, 45, 'Social Worker', 'Georgia'); INSERT INTO MentalHealthProfessional (ProfessionalID, Age, Specialty, State) VALUES (3, 55, 'Counselor', 'Georgia');", "sql": "SELECT MAX(Age) FROM MentalHealthProfessional WHERE State = 'Georgia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the table position for the team whose outgoing manager was Brian Laws?", "schema": "CREATE TABLE table_26593762_3 (position_in_table VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT position_in_table FROM table_26593762_3 WHERE outgoing_manager = 'Brian Laws';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the tries with tries against of 38", "schema": "CREATE TABLE table_name_35 (tries_for VARCHAR, tries_against VARCHAR)", "sql": "SELECT tries_for FROM table_name_35 WHERE tries_against = '38';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 234).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (5,5,'268862871.275335557081');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the event for vinny magalhães?", "schema": "CREATE TABLE table_name_74 (event VARCHAR, opponent VARCHAR)", "sql": "SELECT event FROM table_name_74 WHERE opponent = 'vinny magalhães';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Plperlu (example 5).", "schema": null, "sql": "create or replace function foo(text) returns text language plperl as 'shift';", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Plperlu.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "What is the percentage of mobile subscribers who have experienced a network issue in the last month, grouped by device model?", "schema": "CREATE TABLE mobile_subscribers_network_issues (subscriber_id INT, name VARCHAR(255), device_model VARCHAR(255), last_network_issue_date DATE); INSERT INTO mobile_subscribers_network_issues (subscriber_id, name, device_model, last_network_issue_date) VALUES (1, 'John Doe', 'iPhone 12', '2022-01-15'), (2, 'Jane Doe', 'iPhone 12', '2022-02-01'), (3, 'Maria Garcia', 'Samsung Galaxy S21', NULL);", "sql": "SELECT device_model, 100.0 * COUNT(CASE WHEN last_network_issue_date IS NOT NULL AND last_network_issue_date >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) THEN 1 END) / COUNT(*) AS percentage FROM mobile_subscribers_network_issues GROUP BY device_model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 248, "num_statements": 1} {"question": "What is the maximum container count in a single cargo transaction for port 'HOU'?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(20)); INSERT INTO ports (port_id, port_name) VALUES (1, 'LA'), (2, 'LB'), (3, 'HOU'); CREATE TABLE cargo (cargo_id INT, port_id INT, container_count INT); INSERT INTO cargo (cargo_id, port_id, container_count) VALUES (1, 1, 5000), (2, 1, 3000), (3, 2, 4000), (4, 3, 6000), (5, 3, 7000), (6, 3, 8000);", "sql": "SELECT MAX(container_count) FROM cargo WHERE port_id = (SELECT port_id FROM ports WHERE port_name = 'HOU');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which royal house has the name huai?", "schema": "CREATE TABLE table_name_88 (royal_house VARCHAR, name VARCHAR)", "sql": "SELECT royal_house FROM table_name_88 WHERE name = 'huai';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what category was character Jacqui Mcqueen nominated before 2011?", "schema": "CREATE TABLE table_name_67 (category VARCHAR, character VARCHAR, year VARCHAR)", "sql": "SELECT category FROM table_name_67 WHERE character = 'jacqui mcqueen' AND year < 2011;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'update' (example 194).", "schema": null, "sql": "DROP TRIGGER d1_update_trig ON part_d_1_15;", "explanation": "PL/pgSQL object from PostgreSQL core test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total cargo weight handled by each port during the second half of 2022?", "schema": "CREATE TABLE ports (id INT, name TEXT, country TEXT); CREATE TABLE cargo_movements (id INT, port_id INT, cargo_type TEXT, weight INT, date DATE); INSERT INTO ports (id, name, country) VALUES (1, 'Port of Singapore', 'Singapore'), (2, 'Port of Shanghai', 'China'), (3, 'Port of Busan', 'South Korea'); INSERT INTO cargo_movements (id, port_id, cargo_type, weight, date) VALUES (1, 1, 'Bulk', 10000, '2022-07-01'), (2, 1, 'Breakbulk', 8000, '2022-07-02'), (3, 2, 'Bulk', 12000, '2022-07-01'), (4, 2, 'Breakbulk', 9000, '2022-07-02'), (5, 3, 'Bulk', 15000, '2022-07-01'), (6, 3, 'Breakbulk', 11000, '2022-07-02');", "sql": "SELECT p.name, SUM(cm.weight) FROM ports p JOIN cargo_movements cm ON p.id = cm.port_id WHERE MONTH(cm.date) > 6 GROUP BY p.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many silver when the team is northwest territories and gold is less than 34?", "schema": "CREATE TABLE table_name_14 (silver VARCHAR, team VARCHAR, gold VARCHAR)", "sql": "SELECT COUNT(silver) FROM table_name_14 WHERE team = 'northwest territories' AND gold < 34;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average manufacturing cost of aircrafts produced in the US?", "schema": "CREATE TABLE AircraftManufacturing (id INT, manufacturer VARCHAR(255), country VARCHAR(255), cost FLOAT); INSERT INTO AircraftManufacturing VALUES (1, 'Boeing', 'USA', 120000000), (2, 'Airbus', 'Europe', 150000000), (3, 'Lockheed Martin', 'USA', 200000000);", "sql": "SELECT AVG(cost) FROM AircraftManufacturing WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Worst score for Mario Lopez as the Best dancer and Tango as the Dance?", "schema": "CREATE TABLE table_name_81 (worst_score INTEGER, best_dancer VARCHAR, dance VARCHAR)", "sql": "SELECT AVG(worst_score) FROM table_name_81 WHERE best_dancer = 'mario lopez' AND dance = 'tango';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What tournament has 1r as the 2011?", "schema": "CREATE TABLE table_name_12 (tournament VARCHAR)", "sql": "SELECT tournament FROM table_name_12 WHERE 2011 = '1r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Determine the change in the number of community health workers by race over time?", "schema": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), age INT, race VARCHAR(50), hire_date DATE); INSERT INTO community_health_workers (id, name, age, race, hire_date) VALUES (1, 'John Doe', 35, 'White', '2020-01-01'), (2, 'Jane Smith', 40, 'Black', '2019-01-01'), (3, 'Jose Rodriguez', 30, 'Hispanic', '2021-01-01');", "sql": "SELECT race, COUNT(*) as current_count, LAG(COUNT(*)) OVER (PARTITION BY race ORDER BY hire_date) as previous_count FROM community_health_workers GROUP BY race, hire_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 171, "num_statements": 1} {"question": "What is the average orbital velocity of the International Space Station?", "schema": "CREATE TABLE space_objects (name TEXT, orbital_velocity_km_s INTEGER); INSERT INTO space_objects (name, orbital_velocity_km_s) VALUES ('ISS', 27600), ('Hubble Space Telescope', 28600), ('Moon', 1680);", "sql": "SELECT AVG(orbital_velocity_km_s) FROM space_objects WHERE name = 'ISS';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Year is Walt Disney Pictures Imagemovers Digital?", "schema": "CREATE TABLE table_name_59 (year INTEGER, studio_s_ VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_59 WHERE studio_s_ = 'walt disney pictures imagemovers digital';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "How many professional development workshops were held in 2021?", "schema": "CREATE TABLE workshops (workshop_id INT, year INT, PRIMARY KEY (workshop_id, year)); INSERT INTO workshops (workshop_id, year) VALUES (1, 2021), (2, 2021), (3, 2021);", "sql": "SELECT COUNT(*) FROM workshops WHERE year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "List all investments made by customers in Sydney with an investment amount greater than 5000.", "schema": "CREATE TABLE investment (id INT, customer_id INT, investment_date DATE, amount DECIMAL(10, 2)); INSERT INTO investment (id, customer_id, investment_date, amount) VALUES (1, 1, '2022-01-01', 6000.00), (2, 2, '2022-01-02', 7000.00); CREATE TABLE customer (id INT, name VARCHAR(255), address VARCHAR(255)); INSERT INTO customer (id, name, address) VALUES (1, 'John Smith', 'Sydney'), (2, 'Jane Doe', 'Melbourne');", "sql": "SELECT * FROM investment i JOIN customer c ON i.customer_id = c.id WHERE c.address = 'Sydney' AND i.amount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Find the number of unique educational programs in 'community_education' table", "schema": "CREATE TABLE community_education (id INT, program VARCHAR(50), location VARCHAR(50), date DATE);", "sql": "SELECT COUNT(DISTINCT program) FROM community_education;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many students received accommodations in \"Latin America\" region in 2020?", "schema": "CREATE TABLE Accommodations (student_id INT, region VARCHAR(20), accommodation_date DATE); INSERT INTO Accommodations (student_id, region, accommodation_date) VALUES (1, 'West Coast', '2020-01-01'), (2, 'East Coast', '2019-12-31'), (3, 'West Coast', '2020-02-01'), (4, 'Southwest', '2019-01-01'), (5, 'Latin America', '2020-03-01');", "sql": "SELECT COUNT(*) FROM Accommodations WHERE region = 'Latin America' AND EXTRACT(YEAR FROM accommodation_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Update the price of all vegan nail polishes to $12.99", "schema": "CREATE TABLE nail_polish_sales(product_id INT, sale_price FLOAT); INSERT INTO nail_polish_sales(product_id, sale_price) VALUES (1, 9.99); INSERT INTO nail_polish_sales(product_id, sale_price) VALUES (2, 10.99); CREATE TABLE product_info(product_id INT, is_vegan BOOLEAN); INSERT INTO product_info(product_id, is_vegan) VALUES (1, TRUE); INSERT INTO product_info(product_id, is_vegan) VALUES (2, FALSE); CREATE TABLE product_categories(product_id INT, category_name VARCHAR(50)); INSERT INTO product_categories(product_id, category_name) VALUES (1, 'Nail Polish'); INSERT INTO product_categories(product_id, category_name) VALUES (2, 'Makeup');", "sql": "UPDATE nail_polish_sales SET sale_price = 12.99 FROM nail_polish_sales INNER JOIN product_info ON nail_polish_sales.product_id = product_info.product_id INNER JOIN product_categories ON nail_polish_sales.product_id = product_categories.product_id WHERE product_info.is_vegan = TRUE AND product_categories.category_name = 'Nail Polish';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 335, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is KK -1 if KK -3 is 1,100?", "schema": "CREATE TABLE table_name_79 (kk___1 VARCHAR, kk___3 VARCHAR)", "sql": "SELECT kk___1 FROM table_name_79 WHERE kk___3 = '1,100';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Add a new union named 'United Workers Union' with a membership count of 5000 to the union_members table", "schema": "CREATE TABLE union_members (union_id SERIAL PRIMARY KEY, union_name TEXT, membership_count INT);", "sql": "INSERT INTO union_members (union_name, membership_count) VALUES ('United Workers Union', 5000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "What is the total number of hours played for the game 'League of Legends'?", "schema": "CREATE TABLE PlayerActivity (PlayerID INT, Game VARCHAR(100), HoursPlayed INT); INSERT INTO PlayerActivity (PlayerID, Game, HoursPlayed) VALUES (1, 'Overwatch', 500); INSERT INTO PlayerActivity (PlayerID, Game, HoursPlayed) VALUES (2, 'League of Legends', 1000); INSERT INTO PlayerActivity (PlayerID, Game, HoursPlayed) VALUES (3, 'Fortnite', 750);", "sql": "SELECT SUM(HoursPlayed) FROM PlayerActivity WHERE Game = 'League of Legends';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show the average popularity score for each region in 'trends_by_region' table", "schema": "CREATE TABLE trends_by_region (id INT PRIMARY KEY, region VARCHAR(255), trend_name VARCHAR(255), popularity_score INT);", "sql": "SELECT region, AVG(popularity_score) FROM trends_by_region GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Delete records from the 'digital_assets' table where the 'asset_type' is 'CBDC' and the 'circulating_supply' is less than 1000000", "schema": "CREATE TABLE digital_assets (asset_id INT PRIMARY KEY, asset_name VARCHAR(100), asset_type VARCHAR(50), circulating_supply INT);", "sql": "DELETE FROM digital_assets WHERE asset_type = 'CBDC' AND circulating_supply < 1000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game with a goal of 7?", "schema": "CREATE TABLE table_name_27 (date VARCHAR, goal VARCHAR)", "sql": "SELECT date FROM table_name_27 WHERE goal = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the total revenue generated from ticket sales for each sport?", "schema": "CREATE TABLE sports (sport VARCHAR(255)); CREATE TABLE ticket_prices (sport VARCHAR(255), price DECIMAL(5,2)); CREATE TABLE ticket_sales (sport VARCHAR(255), tickets INT); INSERT INTO sports VALUES ('Basketball'), ('Football'), ('Hockey'), ('Soccer'); INSERT INTO ticket_prices VALUES ('Basketball', 80.50), ('Basketball', 75.20), ('Football', 120.00), ('Football', 110.50), ('Hockey', 65.00), ('Hockey', 70.00), ('Soccer', 40.00), ('Soccer', 45.00); INSERT INTO ticket_sales VALUES ('Basketball', 2500), ('Basketball', 3000), ('Football', 5000), ('Football', 6000), ('Hockey', 2000), ('Hockey', 2500), ('Soccer', 1000), ('Soccer', 1500);", "sql": "SELECT sport, SUM(price * tickets) as total_revenue FROM ticket_prices INNER JOIN ticket_sales ON ticket_prices.sport = ticket_sales.sport GROUP BY sport;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "What is the total number of tourists who visited cultural heritage sites in India last year?", "schema": "CREATE TABLE countries (country_id INT, country TEXT); INSERT INTO countries (country_id, country) VALUES (1, 'India'); CREATE TABLE years (year_id INT, year TEXT); INSERT INTO years (year_id, year) VALUES (1, '2022'); CREATE TABLE tourism (tourist_id INT, country_id INT, year_id INT, site_type TEXT); INSERT INTO tourism (tourist_id, country_id, year_id, site_type) VALUES (1, 1, 1, 'cultural_heritage'), (2, 1, 1, 'beach'), (3, 1, 1, 'cultural_heritage');", "sql": "SELECT COUNT(*) FROM tourism WHERE country_id = (SELECT country_id FROM countries WHERE country = 'India') AND year_id = (SELECT year_id FROM years WHERE year = '2022') AND site_type = 'cultural_heritage';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "What is the change in online travel agency bookings per month for hotels in the 'africa_hotels' view?", "schema": "CREATE VIEW africa_hotels AS SELECT * FROM hotels WHERE continent = 'Africa'; CREATE VIEW online_travel_agency_bookings AS SELECT hotel_id, COUNT(*) as bookings, DATE_TRUNC('month', booking_date) as month FROM online_travel_agency GROUP BY hotel_id, month;", "sql": "SELECT l.name, LAG(o.bookings) OVER (PARTITION BY l.id ORDER BY o.month) as previous_month_bookings, o.bookings as current_month_bookings FROM africa_hotels l JOIN online_travel_agency_bookings o ON l.id = o.hotel_id ORDER BY o.month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 234, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What event ended in 3:02 of round 1?", "schema": "CREATE TABLE table_name_92 (event VARCHAR, round VARCHAR, time VARCHAR)", "sql": "SELECT event FROM table_name_92 WHERE round = 1 AND time = '3:02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the minimum of 60 -64?", "schema": "CREATE TABLE table_16457934_4 (Id VARCHAR)", "sql": "SELECT MIN(60 AS _to_64) FROM table_16457934_4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Which state has the lowest average salary for workers in the 'service' industry?", "schema": "CREATE TABLE service_workers (id INT, name VARCHAR(255), state VARCHAR(255), industry VARCHAR(255), salary DECIMAL(10,2)); INSERT INTO service_workers (id, name, state, industry, salary) VALUES (1, 'James White', 'Texas', 'service', 30000.00), (2, 'Emily Green', 'Florida', 'service', 32000.00);", "sql": "SELECT state, AVG(salary) FROM service_workers WHERE industry = 'service' GROUP BY state ORDER BY AVG(salary) ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'dependency' (example 31).", "schema": null, "sql": "CREATE TABLE deptest1 (f1 int unique);", "explanation": "DDL from PostgreSQL core regression test for Dependency.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 54).", "schema": null, "sql": "SELECT * FROM numrange_test WHERE range_contained_by(numrange(-1e7,-10000.1), nr);", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM numrange_test WHERE range_contained_by(numrange(-1e7,-10000.1), nr)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the number of police officers in Colorado and the number of crimes reported in the state?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); INSERT INTO states (id, name) VALUES (1, 'Colorado'); CREATE TABLE police_departments (id INT, state_id INT, officers INT); INSERT INTO police_departments (id, state_id, officers) VALUES (1, 1, 5000), (2, 1, 6000), (3, 1, 7000), (4, 1, 8000); CREATE TABLE crimes (id INT, state_id INT, reported INT); INSERT INTO crimes (id, state_id, reported) VALUES (1, 1, 50000), (2, 1, 60000), (3, 1, 70000), (4, 1, 80000);", "sql": "SELECT SUM(police_departments.officers) AS total_officers, SUM(crimes.reported) AS total_crimes FROM police_departments INNER JOIN states ON police_departments.state_id = states.id INNER JOIN crimes ON states.id = crimes.state_id WHERE states.name = 'Colorado';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Type of castle from the 14th Century which Condition is preserved?", "schema": "CREATE TABLE table_name_72 (type VARCHAR, date VARCHAR, condition VARCHAR)", "sql": "SELECT type FROM table_name_72 WHERE date = '14th century' AND condition = 'preserved';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the maximum installed capacity (in MW) of renewable energy projects in the 'europe' region, partitioned by country?", "schema": "CREATE TABLE renewable_energy_projects (id INT, country VARCHAR(50), region VARCHAR(50), capacity FLOAT); INSERT INTO renewable_energy_projects (id, country, region, capacity) VALUES (1, 'Germany', 'europe', 3000.00), (2, 'France', 'europe', 2500.00), (3, 'Spain', 'europe', 3500.00);", "sql": "SELECT region, country, MAX(capacity) as max_capacity FROM renewable_energy_projects WHERE region = 'europe' GROUP BY country, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Identify teachers who have not attended any professional development in the last 6 months.", "schema": "CREATE TABLE Teachers (id INT, name VARCHAR(20)); INSERT INTO Teachers (id, name) VALUES (1, 'Jane Doe'), (2, 'Robert Smith'), (3, 'Alice Johnson'); CREATE TABLE ProfessionalDevelopment (teacher_id INT, attended_date DATE); INSERT INTO ProfessionalDevelopment (teacher_id, attended_date) VALUES (1, '2022-01-01'), (2, '2022-02-15'), (3, '2021-06-20'), (4, '2022-06-01');", "sql": "SELECT t.name FROM Teachers t LEFT JOIN ProfessionalDevelopment pd ON t.id = pd.teacher_id WHERE pd.teacher_id IS NULL OR pd.attended_date < DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district re-elected a Republican?", "schema": "CREATE TABLE table_name_21 (district VARCHAR, party VARCHAR, results VARCHAR)", "sql": "SELECT district FROM table_name_21 WHERE party = 'republican' AND results = 're-elected';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the school of the player from the College of Michigan?", "schema": "CREATE TABLE table_name_35 (school VARCHAR, college VARCHAR)", "sql": "SELECT school FROM table_name_35 WHERE college = 'michigan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 580).", "schema": null, "sql": "CREATE TABLE beta_neg_p2 PARTITION OF beta_neg FOR VALUES FROM (200) TO (300);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Identify the number of maintenance requests for each vehicle type and year in the 'maintenance' table", "schema": "CREATE SCHEMA IF NOT EXISTS public_transport;CREATE TABLE IF NOT EXISTS public_transport.maintenance (maintenance_id SERIAL PRIMARY KEY, vehicle_type TEXT, request_date DATE);INSERT INTO public_transport.maintenance (vehicle_type, request_date) VALUES ('Bus', '2022-02-01'), ('Tram', '2022-02-02'), ('Bus', '2022-03-03'), ('Tram', '2022-03-04');", "sql": "SELECT EXTRACT(YEAR FROM request_date) AS year, vehicle_type, COUNT(*) FROM public_transport.maintenance GROUP BY EXTRACT(YEAR FROM request_date), vehicle_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number was the person eliminated at 22:50?", "schema": "CREATE TABLE table_29692554_2 (eliminated VARCHAR, time VARCHAR)", "sql": "SELECT eliminated FROM table_29692554_2 WHERE time = '22:50';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What are the types and unit costs of equipment that have been shipped to the Middle East?", "schema": "CREATE TABLE Equipment (id INT, equipment_type VARCHAR(255), manufacturer VARCHAR(255), production_year INT, unit_cost DECIMAL(10,2)); CREATE TABLE Shipments (id INT, equipment_type VARCHAR(255), quantity INT, destination VARCHAR(255), delivery_date DATE, equipment_id INT); INSERT INTO Equipment (id, equipment_type, manufacturer, production_year, unit_cost) VALUES (5, 'Missile System', 'Raytheon', 2019, 8000000); INSERT INTO Shipments (id, equipment_type, quantity, destination, delivery_date, equipment_id) VALUES (5, 'Missile System', 5, 'Middle East', '2023-02-20', 5);", "sql": "SELECT Equipment.equipment_type, Equipment.unit_cost FROM Equipment INNER JOIN Shipments ON Equipment.id = Shipments.equipment_id WHERE Shipments.destination = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the average claim amount for policies in the 'personal_auto' table?", "schema": "CREATE TABLE personal_auto (policy_id INT, claim_amount DECIMAL(10,2)); INSERT INTO personal_auto (policy_id, claim_amount) VALUES (1, 250.50), (2, 400.75), (3, 120.00);", "sql": "SELECT AVG(claim_amount) FROM personal_auto;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Which vehicles were showcased at the last auto show in Detroit?", "schema": "CREATE TABLE AutoShowInfo (Show VARCHAR(50), City VARCHAR(50), Year INT, Vehicle VARCHAR(50)); INSERT INTO AutoShowInfo (Show, City, Year, Vehicle) VALUES ('North American International Auto Show', 'Detroit', 2020, 'Tesla Model Y'), ('North American International Auto Show', 'Detroit', 2020, 'Ford Mustang Mach-E'), ('North American International Auto Show', 'Detroit', 2020, 'Chevrolet Corvette Stingray'), ('Geneva International Motor Show', 'Geneva', 2020, 'Porsche Taycan'), ('Geneva International Motor Show', 'Geneva', 2020, 'Aston Martin DBX');", "sql": "SELECT Vehicle FROM AutoShowInfo WHERE Show = 'North American International Auto Show' AND City = 'Detroit' AND Year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the GTE Suncoast Classic tournament held?", "schema": "CREATE TABLE table_11622562_1 (location VARCHAR, tournament VARCHAR)", "sql": "SELECT location FROM table_11622562_1 WHERE tournament = 'GTE Suncoast Classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Player which has a To par of –6?", "schema": "CREATE TABLE table_name_99 (player VARCHAR, to_par VARCHAR)", "sql": "SELECT player FROM table_name_99 WHERE to_par = '–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total maintenance cost for military equipment in the European region in the first half of 2021?", "schema": "CREATE TABLE military_equipment (equipment_id INT, region VARCHAR(10), maintenance_cost DECIMAL(10,2), maintenance_date DATE); INSERT INTO military_equipment VALUES (1, 'Europe', 5000.00, '2021-03-01'), (2, 'Asia', 6000.00, '2021-04-01'), (3, 'Europe', 4000.00, '2021-06-01');", "sql": "SELECT SUM(maintenance_cost) FROM military_equipment WHERE region = 'Europe' AND maintenance_date >= DATE '2021-01-01' AND maintenance_date < DATE '2021-07-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Decision listed when the Home was Colorado?", "schema": "CREATE TABLE table_name_28 (decision VARCHAR, home VARCHAR)", "sql": "SELECT decision FROM table_name_28 WHERE home = 'colorado';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Calculate the percentage of women visiting community health centers in New York", "schema": "CREATE TABLE community_health_centers (id INT, name TEXT, state TEXT, gender TEXT, count INT); INSERT INTO community_health_centers (id, name, state, gender, count) VALUES (1, 'Center A', 'New York', 'Female', 100), (2, 'Center B', 'New York', 'Male', 75), (3, 'Center C', 'Texas', 'Female', 150);", "sql": "SELECT state, (SUM(CASE WHEN gender = 'Female' THEN count ELSE 0 END) * 100.0 / SUM(count)) AS percentage FROM community_health_centers WHERE state = 'New York' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "How many landfills were operational in 2018, excluding data from North America and Europe?", "schema": "CREATE TABLE LandfillCapacity (year INT, region VARCHAR(50), landfill VARCHAR(50), capacity FLOAT, filled_volume FLOAT); INSERT INTO LandfillCapacity (year, region, landfill, capacity, filled_volume) VALUES (2018, 'North America', 'Landfill A', 100000, 95000), (2018, 'Europe', 'Landfill B', 120000, 110000), (2018, 'Asia', 'Landfill C', 150000, 145000), (2018, 'South America', 'Landfill D', 80000, 75000), (2018, 'Africa', 'Landfill E', 70000, 65000);", "sql": "SELECT COUNT(*) FROM LandfillCapacity WHERE year = 2018 AND region NOT IN ('North America', 'Europe');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Delete athlete records from the \"athletes\" table for athletes who are not part of any team", "schema": "CREATE TABLE athletes (id INT PRIMARY KEY, name VARCHAR(50), age INT, team VARCHAR(50));", "sql": "DELETE FROM athletes WHERE team IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Show the number of Carbon Offset Programs in the United States", "schema": "CREATE TABLE carbon_offset_programs (id INT, name VARCHAR(100), country VARCHAR(50)); INSERT INTO carbon_offset_programs (id, name, country) VALUES (1, 'Program 1', 'USA'), (2, 'Program 2', 'USA'), (3, 'Program 3', 'Canada');", "sql": "SELECT COUNT(*) FROM carbon_offset_programs WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 39).", "schema": null, "sql": "INSERT INTO t4 VALUES (1,'mmm'), (2,'nnn'), (3,'ooo');", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the weight that all weightlifters managed to clean and jerk, among weightlifters who had a snatch of more than 95 and a Total of more than 200?", "schema": "CREATE TABLE table_name_8 (_jerk VARCHAR, clean_ VARCHAR, total__kg_ VARCHAR, snatch VARCHAR)", "sql": "SELECT COUNT(clean_) & _jerk FROM table_name_8 WHERE total__kg_ = 200 AND snatch > 95;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 246).", "schema": null, "sql": "select jsonb_path_query('[0, 1, -2, -3.4, 5.6]', '$[*].abs()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[0, 1, -2, -3.4, 5.6]', '$[*].abs()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many infrastructure projects were completed in 2021 and 2022?", "schema": "CREATE TABLE infrastructure_projects (id INT, project_name VARCHAR(50), location VARCHAR(50), completion_year INT); INSERT INTO infrastructure_projects (id, project_name, location, completion_year) VALUES (1, 'Highway 101 Expansion', 'California', 2022), (2, 'Bridge Replacement', 'New York', 2021), (3, 'Transit System Upgrade', 'Texas', 2023);", "sql": "SELECT completion_year, COUNT(*) as num_projects FROM infrastructure_projects WHERE completion_year IN (2021, 2022) GROUP BY completion_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the maximum temperature recorded in the Antarctic Ocean?", "schema": "CREATE TABLE antarctic_ocean_temperature (location TEXT, temperature REAL); INSERT INTO antarctic_ocean_temperature (location, temperature) VALUES ('Antarctic Ocean', 2.8), ('Weddell Sea', 1.8), ('Ross Sea', 0.8);", "sql": "SELECT MAX(temperature) FROM antarctic_ocean_temperature;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What school did player number 21 play for?", "schema": "CREATE TABLE table_10015132_11 (school_club_team VARCHAR, no VARCHAR)", "sql": "SELECT school_club_team FROM table_10015132_11 WHERE no = '21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the maximum speed of vessels with 'YANG' prefix that carried dangerous goods in the Pacific Ocean in 2017?", "schema": "CREATE TABLE Vessels (ID INT, Name TEXT, Speed FLOAT, Dangerous_Goods BOOLEAN, Prefix TEXT, Year INT);CREATE VIEW Pacific_Ocean_Vessels AS SELECT * FROM Vessels WHERE Region = 'Pacific Ocean';", "sql": "SELECT MAX(Speed) FROM Pacific_Ocean_Vessels WHERE Prefix = 'YANG' AND Dangerous_Goods = 1 AND Year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Calculate the average energy storage capacity for each energy type", "schema": "CREATE TABLE energy_storage (energy_type VARCHAR(50), capacity FLOAT); INSERT INTO energy_storage (energy_type, capacity) VALUES ('Batteries', 45.6), ('Flywheels', 32.7), ('Batteries', 54.3);", "sql": "SELECT energy_type, AVG(capacity) FROM energy_storage GROUP BY energy_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Delete records in circular_economy_initiatives where initiative_type is 'Composting'", "schema": "CREATE TABLE circular_economy_initiatives (id INT, initiative_type VARCHAR(20), start_year INT, end_year INT);", "sql": "WITH data_to_delete AS (DELETE FROM circular_economy_initiatives WHERE initiative_type = 'Composting' RETURNING *) DELETE FROM circular_economy_initiatives WHERE id IN (SELECT id FROM data_to_delete);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 200, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 184).", "schema": null, "sql": "SELECT stats_reset IS NOT NULL AS has_stats_reset\n FROM pg_stat_all_tables WHERE relid = 'test_last_scan'::regclass;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT stats_reset IS NOT NULL AS has_stats_reset\n FROM pg_stat_all_tables WHERE relid = 'test_last_scan'::regclass) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the weight of the youngest dog.", "schema": "CREATE TABLE pets (weight VARCHAR, pet_age VARCHAR)", "sql": "SELECT weight FROM pets ORDER BY pet_age LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the record at the neutral site for when the overall record is ui, 27-16?", "schema": "CREATE TABLE table_16201038_4 (at_neutral_site VARCHAR, overall_record VARCHAR)", "sql": "SELECT at_neutral_site FROM table_16201038_4 WHERE overall_record = 'UI, 27-16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "List all heritage sites and their respective conservation status, along with the number of artifacts in each site.", "schema": "CREATE TABLE heritage_sites (id INT, name VARCHAR(50), location VARCHAR(30), status VARCHAR(20), artifacts INT); INSERT INTO heritage_sites (id, name, location, status, artifacts) VALUES (1, 'Site1', 'NYC', 'Good', 50), (2, 'Site2', 'LA', 'Fair', 75), (3, 'Site3', 'Sydney', 'Poor', 60);", "sql": "SELECT h.status, h.name, h.location, h.artifacts FROM heritage_sites h ORDER BY h.status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the fastest lap in round 1?", "schema": "CREATE TABLE table_29162856_1 (fastest_lap VARCHAR, round VARCHAR)", "sql": "SELECT fastest_lap FROM table_29162856_1 WHERE round = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many numbers correspond to the score in the final of 6–3, 6–4, 7–6 (13–11)?", "schema": "CREATE TABLE table_26202940_6 (no VARCHAR, score_in_the_final VARCHAR)", "sql": "SELECT COUNT(no) FROM table_26202940_6 WHERE score_in_the_final = '6–3, 6–4, 7–6 (13–11)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many unique organizations provided support in Kenya?", "schema": "CREATE TABLE organizations (id INT, name TEXT, location TEXT); INSERT INTO organizations (id, name, location) VALUES (1, 'WFP', 'Kenya'), (2, 'UNHCR', 'Tanzania'), (3, 'Save the Children', 'Kenya');", "sql": "SELECT COUNT(DISTINCT name) FROM organizations WHERE location = 'Kenya';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average sustainability score for each textile material?", "schema": "CREATE TABLE TextileSources (SourceID INT, Country VARCHAR(255), Material VARCHAR(255), SustainabilityScore INT); INSERT INTO TextileSources (SourceID, Country, Material, SustainabilityScore) VALUES (1, 'India', 'Cotton', 85), (2, 'Brazil', 'Rayon', 70);", "sql": "SELECT Material, AVG(SustainabilityScore) AS AvgSustainabilityScore FROM TextileSources GROUP BY Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the minimum fare for 'Ferry' mode of transport?", "schema": "CREATE TABLE Fares(fare INT, journey_date DATE, mode_of_transport VARCHAR(20)); INSERT INTO Fares(fare, journey_date, mode_of_transport) VALUES (15, '2022-01-01', 'Ferry'), (20, '2022-01-02', 'Ferry'), (25, '2022-01-03', 'Ferry');", "sql": "SELECT MIN(fare) FROM Fares WHERE mode_of_transport = 'Ferry';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the top rank with a time of 2:19.86?", "schema": "CREATE TABLE table_name_25 (rank INTEGER, time VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_25 WHERE time = '2:19.86';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 1016).", "schema": null, "sql": "CREATE POLICY p1 ON dob_t1 TO regress_rls_dob_role1 USING (true);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Calculate the average length and diameter of pipeline segments with a start location in Valdez.", "schema": "CREATE TABLE PipelineSegments (SegmentID INT, SegmentName VARCHAR(50), Length DECIMAL(10,2), Diameter DECIMAL(10,2), StartLocation VARCHAR(50), EndLocation VARCHAR(50)); INSERT INTO PipelineSegments (SegmentID, SegmentName, Length, Diameter, StartLocation, EndLocation) VALUES (1, 'Alaska Pipeline Segment 1', 12.34, 34.56, 'Prudhoe Bay', 'Valdez'); INSERT INTO PipelineSegments (SegmentID, SegmentName, Length, Diameter, StartLocation, EndLocation) VALUES (2, 'Alaska Pipeline Segment 2', 15.67, 45.67, 'Valdez', 'Anchorage');", "sql": "SELECT StartLocation, AVG(Length) AS Avg_Length, AVG(Diameter) AS Avg_Diameter FROM PipelineSegments WHERE StartLocation = 'Valdez' GROUP BY StartLocation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 148).", "schema": null, "sql": "select * from vw_rngfunc;", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from vw_rngfunc) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all the scientists' names, their projects' names, and the hours worked by that scientist on each project, in alphabetical order of project name, and then scientist name.", "schema": "CREATE TABLE AssignedTo (Scientist VARCHAR, Project VARCHAR); CREATE TABLE Projects (Name VARCHAR, Hours VARCHAR, Code VARCHAR); CREATE TABLE Scientists (Name VARCHAR, SSN VARCHAR)", "sql": "SELECT T1.Name, T3.Name, T3.Hours FROM Scientists AS T1 JOIN AssignedTo AS T2 ON T1.SSN = T2.Scientist JOIN Projects AS T3 ON T2.Project = T3.Code ORDER BY T3.Name, T1.Name;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What city of license is associated with call sign w244bk?", "schema": "CREATE TABLE table_name_44 (city_of_license VARCHAR, call_sign VARCHAR)", "sql": "SELECT city_of_license FROM table_name_44 WHERE call_sign = 'w244bk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 114).", "schema": null, "sql": "select '100'::int2 / '0'::int8;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '100'::int2 / '0'::int8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 162).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _keys ( NAME, NAME, CHAR );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which attorneys have handled cases with a total billing amount greater than $5000 for clients from underrepresented communities?", "schema": "CREATE TABLE Attorneys (id INT, name VARCHAR(50), underrepresented_client BOOLEAN); CREATE TABLE Cases (id INT, attorney_id INT, billing_amount DECIMAL(5,2), underrepresented_client BOOLEAN); INSERT INTO Attorneys (id, name, underrepresented_client) VALUES (1, 'Attorney1', TRUE), (2, 'Attorney2', FALSE), (3, 'Attorney3', TRUE); INSERT INTO Cases (id, attorney_id, billing_amount, underrepresented_client) VALUES (1, 1, 2000.00, TRUE), (2, 1, 1500.00, TRUE), (3, 2, 3000.00, FALSE), (4, 3, 800.00, TRUE);", "sql": "SELECT Attorneys.name FROM Attorneys INNER JOIN (SELECT attorney_id, SUM(billing_amount) AS total_billing FROM Cases WHERE underrepresented_client = TRUE GROUP BY attorney_id) AS subquery ON Attorneys.id = subquery.attorney_id WHERE total_billing > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 192).", "schema": null, "sql": "SELECT EXTRACT(ISODOW FROM DATE '2020-08-11');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(ISODOW FROM DATE '2020-08-11')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which season has the Celtic club?", "schema": "CREATE TABLE table_name_53 (season VARCHAR, club VARCHAR)", "sql": "SELECT season FROM table_name_53 WHERE club = 'celtic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many IRST figures for the show that premiered on Canale 5 (2006)?", "schema": "CREATE TABLE table_28803803_1 (irst VARCHAR, original_channel VARCHAR)", "sql": "SELECT COUNT(irst) FROM table_28803803_1 WHERE original_channel = 'Canale 5 (2006)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total size of protected habitats for each animal type?", "schema": "CREATE TABLE Protected_Habitats (id INT, animal_type VARCHAR(50), size INT);", "sql": "SELECT animal_type, SUM(size) FROM Protected_Habitats GROUP BY animal_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Who are the customers with a transaction amount higher than the average transaction amount?", "schema": "CREATE TABLE customer_transactions_2 (customer_id INT, customer_name VARCHAR(20), transaction_id INT, transaction_amount DECIMAL(10,2)); INSERT INTO customer_transactions_2 (customer_id, customer_name, transaction_id, transaction_amount) VALUES (1, 'Ravi Patel', 1, 1000.00), (2, 'Sophia Lee', 2, 500.00), (3, 'Hugo Chen', 3, 2000.00);", "sql": "SELECT customer_name FROM customer_transactions_2 WHERE transaction_amount > (SELECT AVG(transaction_amount) FROM customer_transactions_2);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the method for a record of 4-2 and round smaller than 3?", "schema": "CREATE TABLE table_name_78 (method VARCHAR, round VARCHAR, record VARCHAR)", "sql": "SELECT method FROM table_name_78 WHERE round < 3 AND record = '4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which excavation sites have more than 3 tool artifacts?", "schema": "CREATE TABLE site_a (artifact_id INT, artifact_type VARCHAR(255)); INSERT INTO site_a (artifact_id, artifact_type) VALUES (1, 'Pottery'), (2, 'Tools'), (3, 'Tools'), (4, 'Tools'); CREATE TABLE site_b (artifact_id INT, artifact_type VARCHAR(255)); INSERT INTO site_b (artifact_id, artifact_type) VALUES (5, 'Pottery'), (6, 'Tools'), (7, 'Tools');", "sql": "SELECT context FROM (SELECT 'site_a' AS context, COUNT(*) as count FROM site_a WHERE artifact_type = 'Tools' UNION ALL SELECT 'site_b' AS context, COUNT(*) as count FROM site_b WHERE artifact_type = 'Tools') AS subquery WHERE count > 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "How many space missions have been led by female astronauts?", "schema": "CREATE TABLE SpaceMissions (MissionID INT, Name VARCHAR(50), LaunchDate DATE, Destination VARCHAR(50), Commander VARCHAR(50)); INSERT INTO SpaceMissions VALUES (1, 'Apollo 18', '1975-12-02', 'Moon', 'Jane Smith'), (2, 'Apollo 19', '1976-09-15', 'Moon', 'John Doe'), (3, 'Apollo 20', '1977-02-03', 'Moon', 'John Doe'), (4, 'Apollo 21', '1978-12-12', 'Moon', 'Jane Smith'), (5, 'Apollo 22', '1980-07-04', 'Mars', 'John Doe'), (6, 'Apollo 23', '1981-05-05', 'Mars', 'John Doe'), (7, 'Apollo 24', '1983-06-18', 'Mars', 'Jane Smith'), (8, 'Apollo 25', '1985-03-06', 'Mars', 'John Doe'), (9, 'Apollo 26', '1987-11-20', 'Mars', 'Jane Smith'), (10, 'Apollo 27', '1990-09-25', 'Ceres', 'John Doe');", "sql": "SELECT COUNT(*) FROM SpaceMissions WHERE Commander IN ('Jane Smith', 'Emily Johnson', 'Sophia Garcia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the percentage of security incidents caused by each attack vector for a specific country in the last year?", "schema": "CREATE TABLE security_incidents (id INT, timestamp TIMESTAMP, country VARCHAR(255), attack_vector VARCHAR(255), incident_type VARCHAR(255)); INSERT INTO security_incidents (id, timestamp, country, attack_vector, incident_type) VALUES (1, '2021-01-01 12:00:00', 'Germany', 'Network', 'Ransomware'), (2, '2021-02-05 10:30:00', 'France', 'Email', 'Phishing');", "sql": "SELECT attack_vector, 100.0 * COUNT(*) / (SELECT COUNT(*) FROM security_incidents WHERE timestamp >= NOW() - INTERVAL 1 YEAR AND country = 'Germany') as percentage FROM security_incidents WHERE timestamp >= NOW() - INTERVAL 1 YEAR AND country = 'Germany' GROUP BY attack_vector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 278, "num_statements": 1} {"question": "Display the causes and total donation amounts for donors with the last name 'Smith', joining the donors and donations tables.", "schema": "CREATE TABLE donors (id INT, first_name VARCHAR(255), last_name VARCHAR(255)); INSERT INTO donors (id, first_name, last_name) VALUES (1, 'John', 'Doe'), (2, 'Jane', 'Smith'); CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10, 2)); INSERT INTO donations (id, donor_id, amount) VALUES (1, 1, 500), (2, 1, 250), (3, 2, 750);", "sql": "SELECT c.name, SUM(donations.amount) as total_donation FROM donors d JOIN donations ON d.id = donations.donor_id JOIN causes c ON donations.cause_id = c.id WHERE d.last_name = 'Smith' GROUP BY c.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Populate the virtual_reality table", "schema": "CREATE TABLE virtual_reality (vr_id INT PRIMARY KEY, name VARCHAR(50), release_date DATE, manufacturer VARCHAR(50));", "sql": "INSERT INTO virtual_reality (vr_id, name, release_date, manufacturer) VALUES (1, 'Oculus Quest 2', '2020-10-13', 'Facebook Technologies'), (2, 'HTC Vive Pro 2', '2021-06-01', 'HTC Corporation'), (3, 'Valve Index', '2019-06-28', 'Valve Corporation');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 249, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the GP winner for the Race winners valentin giraud / nicolas musset, and a place genk?", "schema": "CREATE TABLE table_name_2 (gp_winner VARCHAR, race_winners VARCHAR, place VARCHAR)", "sql": "SELECT gp_winner FROM table_name_2 WHERE race_winners = 'valentin giraud / nicolas musset' AND place = 'genk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the total number of mobile and broadband subscribers for each network type?", "schema": "CREATE TABLE subscriber_data (subscriber_type VARCHAR(20), network_type VARCHAR(20), subscriber_count INT); INSERT INTO subscriber_data (subscriber_type, network_type, subscriber_count) VALUES ('Mobile', '4G', 5000), ('Broadband', 'Fiber', 3000), ('Mobile', '5G', 7000), ('Broadband', 'Cable', 4000);", "sql": "SELECT network_type, SUM(subscriber_count) FROM subscriber_data GROUP BY network_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 83).", "schema": null, "sql": "-- Try another user.\nREVOKE CREATE ON TABLESPACE pg_default FROM public;", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "dcl_security", "is_postgresql_specific": true, "sql_length": 72, "num_statements": 1} {"question": "Which threat actors have targeted systems with a CVE score greater than 7 in the last year?", "schema": "CREATE TABLE threat_actors (threat_actor_id INT, threat_actor_name VARCHAR(255));CREATE TABLE targeted_systems (system_id INT, system_name VARCHAR(255), sector VARCHAR(255), threat_actor_id INT);CREATE TABLE cve_scores (system_id INT, score INT, scan_date DATE);CREATE TABLE scan_dates (scan_date DATE, system_id INT);", "sql": "SELECT ta.threat_actor_name FROM threat_actors ta INNER JOIN targeted_systems ts ON ta.threat_actor_id = ts.threat_actor_id INNER JOIN cve_scores c ON ts.system_id = c.system_id INNER JOIN scan_dates sd ON ts.system_id = sd.system_id WHERE c.score > 7 AND sd.scan_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 312, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the team name for Chungju?", "schema": "CREATE TABLE table_name_11 (team VARCHAR, city VARCHAR)", "sql": "SELECT team FROM table_name_11 WHERE city = 'chungju';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the percentage of the global terbium production in 2020 that comes from African countries?", "schema": "CREATE TABLE mines (id INT, name TEXT, location TEXT, production_quantity INT, year INT, element TEXT); INSERT INTO mines (id, name, location, production_quantity, year, element) VALUES (1, 'Kentor Gold', 'South Africa', 300, 2020, 'terbium'), (2, 'Thor Mining', 'Botswana', 200, 2020, 'terbium'); CREATE TABLE global_production (element TEXT, year INT, production_quantity INT); INSERT INTO global_production (element, year, production_quantity) VALUES ('terbium', 2020, 5000);", "sql": "SELECT (SUM(CASE WHEN location = 'Africa' THEN production_quantity ELSE 0 END) / (SELECT SUM(production_quantity) FROM global_production WHERE element = 'terbium' AND year = 2020)) * 100 FROM mines WHERE year = 2020 AND element = 'terbium';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "List all policyholders with home insurance policies in Texas who are over 60 years old", "schema": "CREATE TABLE policyholders (policy_id INT, policyholder_age INT, policy_type VARCHAR(20)); CREATE TABLE policies (policy_id INT, policy_state VARCHAR(2));", "sql": "SELECT DISTINCT policyholder_age, policy_type FROM policyholders JOIN policies ON policyholders.policy_id = policies.policy_id WHERE policies.policy_state = 'TX' AND policyholder_age > 60;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What's the average ESG score for nonprofits in the 'Healthcare' field?", "schema": "CREATE TABLE if not exists nonprofits (id INT PRIMARY KEY, name TEXT, field TEXT, location TEXT, annual_budget DECIMAL(10,2)); INSERT INTO nonprofits (id, name, field, location, annual_budget) VALUES (1, 'Health Hope', 'Healthcare', 'Chicago', 125000.00); CREATE TABLE if not exists esg_factors (id INT PRIMARY KEY, nonprofit_id INT, environmental_score DECIMAL(3,2), social_score DECIMAL(3,2), governance_score DECIMAL(3,2)); INSERT INTO esg_factors (id, nonprofit_id, environmental_score, social_score, governance_score) VALUES (1, 1, 4.2, 4.5, 4.8);", "sql": "SELECT AVG(environmental_score) AS avg_environmental_score, AVG(social_score) AS avg_social_score, AVG(governance_score) AS avg_governance_score FROM esg_factors WHERE nonprofit_id IN (SELECT id FROM nonprofits WHERE field = 'Healthcare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Result has a Venue of A, and an Opponent of manchester united?", "schema": "CREATE TABLE table_name_96 (result VARCHAR, venue VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_96 WHERE venue = 'a' AND opponent = 'manchester united';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average construction cost per project type in the transportation domain?", "schema": "CREATE TABLE transportation_projects (id INT, project_type VARCHAR(255), construction_cost FLOAT); INSERT INTO transportation_projects (id, project_type, construction_cost) VALUES (1, 'Bridge', 5000000), (2, 'Road', 2000000), (3, 'Highway', 15000000);", "sql": "SELECT project_type, AVG(construction_cost) FROM transportation_projects GROUP BY project_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the maximum fare for each route type in the 'route' table?", "schema": "CREATE TABLE route (id INT, name TEXT, type TEXT, fare FLOAT); INSERT INTO route (id, name, type, fare) VALUES (1, 'Central Line', 'Underground', 3.5), (2, 'Circle Line', 'Underground', 4.2), (3, 'Jubilee Line', 'Underground', 5.0), (4, 'Bus Route 123', 'Bus', 2.5), (5, 'Bus Route 456', 'Bus', 3.0);", "sql": "SELECT type, MAX(fare) as max_fare FROM route GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the customer first, last name and id with least number of accounts.", "schema": "CREATE TABLE Accounts (customer_id VARCHAR); CREATE TABLE Customers (customer_first_name VARCHAR, customer_last_name VARCHAR, customer_id VARCHAR)", "sql": "SELECT T2.customer_first_name, T2.customer_last_name, T1.customer_id FROM Accounts AS T1 JOIN Customers AS T2 ON T1.customer_id = T2.customer_id GROUP BY T1.customer_id ORDER BY COUNT(*) LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Determine the difference in yield between the highest and lowest yield crops in the Atlantic region in 2022.", "schema": "CREATE TABLE crops (id INT, name VARCHAR(50), yield INT, acrate DECIMAL(5,2), region VARCHAR(50), year INT); INSERT INTO crops (id, name, yield, acrate, region, year) VALUES (1, 'Corn', 200, 2.3, 'Atlantic', 2022), (2, 'Soybeans', 120, 2.2, 'Atlantic', 2022), (3, 'Wheat', 180, 2.5, 'Atlantic', 2022);", "sql": "SELECT MAX(yield) - MIN(yield) FROM crops WHERE region = 'Atlantic' AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position does Bobby Micho, who was picked later than 186 others, play on the Broncos team?", "schema": "CREATE TABLE table_name_30 (position VARCHAR, pick__number VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_name_30 WHERE pick__number > 186 AND player = 'bobby micho';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Insert a new 'education' record into the 'education_programs' table", "schema": "CREATE TABLE education_programs (id INT, name VARCHAR(50), description TEXT, target_audience VARCHAR(50), duration INT);", "sql": "INSERT INTO education_programs (id, name, description, target_audience, duration) VALUES (1, 'Wildlife Warriors', 'A program to educate children about wildlife conservation.', 'Children (Ages 8-12)', 12);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was Laurent-Marie-Joseph Imbert / St. Imbert, who was beatified after 1909 and canonised after 1984, martyred?", "schema": "CREATE TABLE table_name_65 (martyred INTEGER, canonised VARCHAR, beatified VARCHAR, name VARCHAR)", "sql": "SELECT MAX(martyred) FROM table_name_65 WHERE beatified > 1909 AND name = 'laurent-marie-joseph imbert / st. imbert' AND canonised > 1984;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the total amount of research grants awarded to each department in the past 3 years, and what is the maximum grant amount awarded to a single student during this time period?", "schema": "CREATE TABLE research_grants (grant_id INT, student_id INT, dept_name VARCHAR(50), grant_amount INT, grant_date DATE);", "sql": "SELECT rg.dept_name, SUM(rg.grant_amount) as total_grant_amount, MAX(rg.grant_amount) as max_grant_amount FROM research_grants rg WHERE rg.grant_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 3 YEAR) GROUP BY rg.dept_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many entries are shown for viewers when the airdate was 26 november 2009?", "schema": "CREATE TABLE table_24399615_3 (viewers VARCHAR, airdate VARCHAR)", "sql": "SELECT COUNT(viewers) FROM table_24399615_3 WHERE airdate = '26 November 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the court ranking of 5th son of tadayori and has revenues of 10,000 koku?", "schema": "CREATE TABLE table_name_32 (court_rank VARCHAR, revenues VARCHAR, lineage VARCHAR)", "sql": "SELECT court_rank FROM table_name_32 WHERE revenues = '10,000 koku' AND lineage = '5th son of tadayori';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Series has a Home of 2–0, and an Opponent of panathinaikos?", "schema": "CREATE TABLE table_name_96 (series VARCHAR, home VARCHAR, opponent VARCHAR)", "sql": "SELECT series FROM table_name_96 WHERE home = '2–0' AND opponent = 'panathinaikos';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the total number of accommodations provided for students in the 'Science' department?", "schema": "CREATE TABLE Students (student_id INT, department VARCHAR(255)); CREATE TABLE Accommodations (accommodation_id INT, student_id INT, accommodation_type VARCHAR(255));", "sql": "SELECT COUNT(*) as total_accommodations FROM Accommodations WHERE student_id IN ( SELECT student_id FROM Students WHERE department = 'Science' );", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What is the minimum data usage for postpaid mobile customers in the city of Los Angeles?", "schema": "CREATE TABLE mobile_customers (customer_id INT, data_usage FLOAT, city VARCHAR(20), plan_type VARCHAR(10)); INSERT INTO mobile_customers (customer_id, data_usage, city, plan_type) VALUES (1, 3.5, 'Los Angeles', 'postpaid'), (2, 4.2, 'New York', 'postpaid'), (3, 3.8, 'Los Angeles', 'prepaid');", "sql": "SELECT MIN(data_usage) FROM mobile_customers WHERE city = 'Los Angeles' AND plan_type = 'postpaid';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Calculate the total revenue of sativa-based products sold in Oregon dispensaries in Q1 2022.", "schema": "CREATE TABLE products (type VARCHAR(10), category VARCHAR(10), price DECIMAL(5,2), quantity INT); INSERT INTO products (type, category, price, quantity) VALUES ('oil', 'sativa', 70, 50), ('flower', 'sativa', 100, 75), ('edible', 'sativa', 60, 40); CREATE TABLE dispensaries (state VARCHAR(20), sales INT); INSERT INTO dispensaries (state, sales) VALUES ('Oregon', 1800), ('Oregon', 2000); CREATE TABLE time_periods (quarter INT); INSERT INTO time_periods (quarter) VALUES (1), (2);", "sql": "SELECT SUM(products.price * products.quantity) FROM products JOIN dispensaries ON TRUE WHERE products.category = 'sativa' AND dispensaries.state = 'Oregon' AND time_periods.quarter = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the margin of victory over Steve Rintoul?", "schema": "CREATE TABLE table_name_61 (margin_of_victory VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT margin_of_victory FROM table_name_61 WHERE runner_s__up = 'steve rintoul';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total number of military bases in the country, grouped by their base type?", "schema": "CREATE TABLE military_bases (id INT, name VARCHAR(255), base_type VARCHAR(255), country VARCHAR(255));INSERT INTO military_bases (id, name, base_type, country) VALUES (1, 'Fort Bragg', 'Army', 'USA'), (2, 'Camp Pendleton', 'Marine', 'USA'), (3, 'CFB Petawawa', 'Army', 'Canada');", "sql": "SELECT base_type, COUNT(*) as total_bases FROM military_bases GROUP BY base_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List all legal technology grants awarded by a specific organization", "schema": "CREATE TABLE grants (id INT PRIMARY KEY, organization VARCHAR(255), category VARCHAR(255), amount DECIMAL(10,2), date_awarded DATE);", "sql": "SELECT * FROM grants WHERE organization = 'Legal Code Foundation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number of rank has more than 2 medals in total with less than 4 bronze?", "schema": "CREATE TABLE table_name_26 (rank VARCHAR, total VARCHAR, bronze VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_26 WHERE total > 2 AND bronze < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List dispensaries that have sold more than 100 units of a specific strain, 'Blue Dream', in the last month.", "schema": "CREATE TABLE Dispensaries (DispensaryID INT, DispensaryName VARCHAR(50)); CREATE TABLE Strains (StrainID INT, StrainName VARCHAR(50)); CREATE TABLE Sales (SaleID INT, DispensaryID INT, StrainID INT, QuantitySold INT, SaleDate DATE);", "sql": "SELECT D.DispensaryID, D.DispensaryName FROM Dispensaries D JOIN Sales S ON D.DispensaryID = S.DispensaryID JOIN Strains ST ON S.StrainID = ST.StrainID WHERE StrainName = 'Blue Dream' AND S.SaleDate >= DATEADD(month, -1, GETDATE()) GROUP BY D.DispensaryID, D.DispensaryName HAVING SUM(QuantitySold) > 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 305, "num_statements": 1} {"question": "How many wind energy projects were completed in Canada in 2021?", "schema": "CREATE TABLE renewable_energy (id INT, project TEXT, location TEXT, year INT, type TEXT, status TEXT); INSERT INTO renewable_energy (id, project, location, year, type, status) VALUES (1, 'Alberta Wind Energy', 'Canada', 2021, 'wind', 'completed'), (2, 'British Columbia Solar Energy', 'Canada', 2021, 'solar', 'in progress');", "sql": "SELECT COUNT(*) FROM renewable_energy WHERE location = 'Canada' AND year = 2021 AND type = 'wind' AND status = 'completed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the average sale price of military equipment by type for defense contractor XYZ?", "schema": "CREATE TABLE EquipmentTypes (equipment_type VARCHAR(50), manufacturer VARCHAR(50), sale_price DECIMAL(10, 2)); INSERT INTO EquipmentTypes (equipment_type, manufacturer, sale_price) VALUES ('Tank', 'XYZ', 5000000.00); INSERT INTO EquipmentTypes (equipment_type, manufacturer, sale_price) VALUES ('Fighter Jet', 'XYZ', 80000000.00);", "sql": "SELECT equipment_type, AVG(sale_price) as avg_sale_price FROM EquipmentTypes WHERE manufacturer = 'XYZ' GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "How many public libraries were there in 'CityA' as of 2020?", "schema": "CREATE TABLE Libraries(city VARCHAR(20), year INT, num_libraries INT); INSERT INTO Libraries VALUES ('CityA', 2018, 5), ('CityA', 2019, 6), ('CityA', 2020, 7), ('CityC', 2018, 4), ('CityC', 2019, 5), ('CityC', 2020, 6);", "sql": "SELECT num_libraries FROM Libraries WHERE city = 'CityA' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the runner(s)-up for the Feb 18, 1996 tournament?", "schema": "CREATE TABLE table_name_39 (runner_s__up VARCHAR, date VARCHAR)", "sql": "SELECT runner_s__up FROM table_name_39 WHERE date = 'feb 18, 1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the incident type description for the incident type with code \"VIOLENCE\"?", "schema": "CREATE TABLE Ref_Incident_Type (incident_type_description VARCHAR, incident_type_code VARCHAR)", "sql": "SELECT incident_type_description FROM Ref_Incident_Type WHERE incident_type_code = 'VIOLENCE';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many phone hardware models are produced by the company named \"Nokia Corporation\"?", "schema": "CREATE TABLE phone (Company_name VARCHAR)", "sql": "SELECT COUNT(*) FROM phone WHERE Company_name = 'Nokia Corporation';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the most common diagnosis in rural areas of Wyoming?", "schema": "CREATE TABLE diagnoses (diagnosis_id INT, diagnosis VARCHAR(50), patient_id INT, visit_date DATE, location VARCHAR(20));", "sql": "SELECT diagnosis, COUNT(*) FROM diagnoses WHERE location = 'Rural Wyoming' GROUP BY diagnosis ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the 1966 final played?", "schema": "CREATE TABLE table_name_12 (finals_venue__surface_ VARCHAR, year VARCHAR)", "sql": "SELECT finals_venue__surface_ FROM table_name_12 WHERE year = 1966;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What away team has a Tie no of 5?", "schema": "CREATE TABLE table_name_97 (away_team VARCHAR, tie_no VARCHAR)", "sql": "SELECT away_team FROM table_name_97 WHERE tie_no = '5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What series has dan povenmire as the director, with museum scream as the title?", "schema": "CREATE TABLE table_name_46 (series VARCHAR, director VARCHAR, title VARCHAR)", "sql": "SELECT series FROM table_name_46 WHERE director = 'dan povenmire' AND title = 'museum scream';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who as the home team for game on 5 october 2011?", "schema": "CREATE TABLE table_24949975_1 (home_team VARCHAR, date VARCHAR)", "sql": "SELECT home_team FROM table_24949975_1 WHERE date = '5 October 2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the first episode with a netflix episode code of s02e20?", "schema": "CREATE TABLE table_15187735_4 (episode INTEGER, netflix VARCHAR)", "sql": "SELECT MIN(episode) FROM table_15187735_4 WHERE netflix = 'S02E20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What are the names and countries of all AI researchers who are over the age of 40 and have published at least one research paper?", "schema": "CREATE TABLE Researchers (id INT, name VARCHAR(255), age INT, country VARCHAR(255), paper VARCHAR(255));", "sql": "SELECT name, country FROM Researchers WHERE age > 40 AND paper IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of permits issued for residential and commercial buildings in the City of Angels?", "schema": "CREATE TABLE building_permit (permit_id INT, building_type VARCHAR(10), location VARCHAR(20));INSERT INTO building_permit (permit_id, building_type, location) VALUES (1, 'Residential', 'City of Angels');INSERT INTO building_permit (permit_id, building_type, location) VALUES (2, 'Commercial', 'City of Angels');", "sql": "SELECT SUM(permit_id) FROM building_permit WHERE location = 'City of Angels';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "List the types of fish and their quantities that are farmed in each country using sustainable methods, excluding fish from Canada.", "schema": "CREATE TABLE FarmI (species VARCHAR(20), country VARCHAR(20), quantity INT, farming_method VARCHAR(20)); INSERT INTO FarmI (species, country, quantity, farming_method) VALUES ('Salmon', 'Canada', 7000, 'Sustainable'); INSERT INTO FarmI (species, country, quantity, farming_method) VALUES ('Trout', 'Canada', 4000, 'Sustainable'); INSERT INTO FarmI (species, country, quantity, farming_method) VALUES ('Salmon', 'Norway', 6000, 'Sustainable'); INSERT INTO FarmI (species, country, quantity, farming_method) VALUES ('Trout', 'Norway', 3000, 'Sustainable'); INSERT INTO FarmI (species, country, quantity, farming_method) VALUES ('Herring', 'Scotland', 2500, 'Sustainable');", "sql": "SELECT country, species, SUM(quantity) FROM FarmI WHERE farming_method = 'Sustainable' AND country != 'Canada' GROUP BY country, species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team of the game on February 4?", "schema": "CREATE TABLE table_name_74 (home VARCHAR, date VARCHAR)", "sql": "SELECT home FROM table_name_74 WHERE date = 'february 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the event in a year before 1989?", "schema": "CREATE TABLE table_name_37 (event VARCHAR, year INTEGER)", "sql": "SELECT event FROM table_name_37 WHERE year < 1989;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the maximum budget for biotech startups founded in 2020?", "schema": "CREATE TABLE startup_funding (name VARCHAR(255), year INT, budget FLOAT); INSERT INTO startup_funding (name, year, budget) VALUES ('StartupA', 2020, 5000000), ('StartupB', 2020, 7000000), ('StartupC', 2019, 6000000);", "sql": "SELECT MAX(budget) FROM startup_funding WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Region with a Catalog that is vlmx 1087-3?", "schema": "CREATE TABLE table_name_27 (region VARCHAR, catalog VARCHAR)", "sql": "SELECT region FROM table_name_27 WHERE catalog = 'vlmx 1087-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the number of members who have been members for more than 1 year?", "schema": "CREATE TABLE gym_memberships (id INT, member_name VARCHAR(50), start_date DATE, end_date DATE, membership_type VARCHAR(50), price DECIMAL(5,2));", "sql": "SELECT COUNT(DISTINCT member_name) AS long_term_members FROM gym_memberships WHERE DATEDIFF(CURDATE(), start_date) > 365;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Who are the top three employees with the highest salaries in the Engineering department?", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2)); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Salary) VALUES (1, 'John', 'Doe', 'Engineering', 75000.00), (2, 'Jane', 'Doe', 'Engineering', 80000.00), (3, 'Mike', 'Smith', 'Marketing', 60000.00), (4, 'Samantha', 'Johnson', 'Engineering', 85000.00), (5, 'David', 'Brown', 'Engineering', 82000.00);", "sql": "SELECT EmployeeID, FirstName, LastName, Department, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank FROM Employees WHERE Department = 'Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "Delete the supplier from 'USA' with id 4", "schema": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(100), address VARCHAR(255), city VARCHAR(100), country VARCHAR(100));", "sql": "DELETE FROM suppliers WHERE id = 4 AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player placed t10?", "schema": "CREATE TABLE table_name_17 (player VARCHAR, place VARCHAR)", "sql": "SELECT player FROM table_name_17 WHERE place = 't10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 3).", "schema": null, "sql": "SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (util_time, item 11).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_functions.cagg_watermark(hypertable_id INTEGER)\nRETURNS INT8 AS '@MODULE_PATHNAME@', 'ts_continuous_agg_watermark' LANGUAGE C STABLE STRICT PARALLEL RESTRICTED;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 200, "num_statements": 1} {"question": "Find the number of juvenile cases that were resolved through community supervision, broken down by race/ethnicity, for the past year.", "schema": "CREATE TABLE JuvenileCases (Id INT, Race VARCHAR(50), Program VARCHAR(50), ResolutionDate DATE); INSERT INTO JuvenileCases (Id, Race, Program, ResolutionDate) VALUES (1, 'Hispanic', 'Community Supervision', '2021-03-21'), (2, 'Black', 'Probation', '2020-12-12'), (3, 'Asian', 'Community Supervision', '2021-06-15');", "sql": "SELECT Race, COUNT(*) as NumCases FROM JuvenileCases WHERE Program = 'Community Supervision' AND YEAR(ResolutionDate) = 2021 GROUP BY Race;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value in 2008-09 for the Autumn Gold event?", "schema": "CREATE TABLE table_name_9 (event VARCHAR)", "sql": "SELECT 2008 AS _09 FROM table_name_9 WHERE event = 'autumn gold';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Delete all wind energy projects in Germany before 2010.", "schema": "CREATE TABLE energy_projects (name TEXT, country TEXT, technology TEXT, capacity_mw INTEGER, year INTEGER); INSERT INTO energy_projects (name, country, technology, capacity_mw, year) VALUES ('Project A', 'Germany', 'Wind', 10, 2005), ('Project B', 'Germany', 'Wind', 15, 2012);", "sql": "DELETE FROM energy_projects WHERE country = 'Germany' AND technology = 'Wind' AND year < 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "How many crimes were committed in the last 30 days in each district in Los Angeles?", "schema": "CREATE TABLE la_districts (id INT, district_name VARCHAR(255));CREATE TABLE crimes (id INT, district_id INT, crime_date DATE);INSERT INTO la_districts (id, district_name) VALUES (1, 'Hollywood'), (2, 'Downtown'), (3, 'Venice');INSERT INTO crimes (id, district_id, crime_date) VALUES (1, 1, '2022-03-01'), (2, 1, '2022-03-15'), (3, 2, '2022-03-05'), (4, 2, '2022-03-20'), (5, 3, '2022-03-10'), (6, 3, '2022-03-25');", "sql": "SELECT d.district_name, COUNT(c.id) crimes_in_last_30_days FROM la_districts d JOIN crimes c ON d.id = c.district_id WHERE c.crime_date >= CURDATE() - INTERVAL 30 DAY GROUP BY d.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Club has a Founded smaller than 1882, and a League of fa premier league?", "schema": "CREATE TABLE table_name_76 (club VARCHAR, founded VARCHAR, league VARCHAR)", "sql": "SELECT club FROM table_name_76 WHERE founded < 1882 AND league = 'fa premier league';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of volunteers and total hours donated by them in '2018'?", "schema": "CREATE TABLE Volunteers (VolunteerID int, VolunteerName varchar(50), HoursDonated int, VolunteerYear int); INSERT INTO Volunteers (VolunteerID, VolunteerName, HoursDonated, VolunteerYear) VALUES (1, 'Samantha Green', 30, 2018), (2, 'Taylor White', 20, 2018), (3, 'Uma Black', 15, 2018), (4, 'Victor Red', 25, 2018);", "sql": "SELECT COUNT(VolunteerName) as TotalVolunteers, SUM(HoursDonated) as TotalHours FROM Volunteers WHERE VolunteerYear = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Create a table named 'space_missions' with columns mission_name, start_date, end_date, mission_type", "schema": "CREATE TABLE space_missions (mission_name VARCHAR(100) PRIMARY KEY, start_date DATE, end_date DATE, mission_type VARCHAR(50));", "sql": "CREATE TABLE space_missions (mission_name VARCHAR(100) PRIMARY KEY, start_date DATE, end_date DATE, mission_type VARCHAR(50));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Display the number of sustainable tourism certifications obtained by each country in Asia.", "schema": "CREATE TABLE countries (country_id INT, name TEXT, region TEXT, num_certifications INT);", "sql": "SELECT name, num_certifications FROM countries WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Who are the diplomats with the highest salaries?", "schema": "CREATE TABLE diplomacy_staff (staff_id INT, name VARCHAR(255), position VARCHAR(255), salary INT); INSERT INTO diplomacy_staff (staff_id, name, position, salary) VALUES (1, 'John Doe', 'Ambassador', 75000), (2, 'Jane Smith', 'Consul', 50000), (3, 'Michael Johnson', 'Diplomatic Attaché', 60000), (4, 'Sarah Brown', 'Ambassador', 90000), (5, 'David Williams', 'Consul', 80000);", "sql": "SELECT name, position, salary FROM diplomacy_staff ORDER BY salary DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average safety rating of AI models developed for education applications, and how many of these models have been developed by organizations based in Asia?", "schema": "CREATE TABLE AIModels (id INT, model_name VARCHAR(50), organization VARCHAR(50), application_type VARCHAR(50), safety_rating INT), AIOrganizations (id INT, organization VARCHAR(50), region VARCHAR(50)); INSERT INTO AIModels (id, model_name, organization, application_type, safety_rating) VALUES (1, 'AI4Education', 'Microsoft', 'Education', 85), (2, 'AI4Learning', 'Google', 'Education', 90), (3, 'AI4Teaching', 'IBM', 'Education', 88), (4, 'AI4Students', 'Alibaba', 'Education', 92), (5, 'AI4Classroom', 'Tencent', 'Education', 80); INSERT INTO AIOrganizations (id, organization, region) VALUES (1, 'Microsoft', 'North America'), (2, 'Google', 'North America'), (3, 'IBM', 'North America'), (4, 'Alibaba', 'Asia'), (5, 'Tencent', 'Asia');", "sql": "SELECT AVG(safety_rating) as avg_safety_rating FROM AIModels WHERE application_type = 'Education'; SELECT COUNT(*) as asian_org_count FROM AIOrganizations WHERE region = 'Asia' AND organization IN (SELECT organization FROM AIModels WHERE application_type = 'Education');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 2} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 38).", "schema": null, "sql": "SELECT '[{\"attributes\" : [2,3], \"dependency\" : [1,null], \"degree\": 1.000}]'::pg_dependencies;", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [2,3], \"dependency\" : [1,null], \"degree\": 1.000}]'::pg_dependencies) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "How many sustainable suppliers are there in total for each country?", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(255), country VARCHAR(255), sustainable BOOLEAN); CREATE VIEW sustainable_suppliers AS SELECT name FROM suppliers WHERE sustainable = TRUE;", "sql": "SELECT country, COUNT(*) as total_sustainable_suppliers FROM suppliers WHERE sustainable = TRUE GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'box' (example 11).", "schema": null, "sql": "INSERT INTO BOX_TBL (f1) VALUES ('asdfasdf(ad');", "explanation": "DML from PostgreSQL core regression test for Box.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the total amount of donations received by each community center in New York?", "schema": "CREATE TABLE community_centers (id INT, name TEXT, city TEXT); INSERT INTO community_centers (id, name, city) VALUES (1, 'Manhattan Community Center', 'New York'), (2, 'Bronx Community Center', 'New York'); CREATE TABLE donations (id INT, center_id INT, amount DECIMAL); INSERT INTO donations (id, center_id, amount) VALUES (1, 1, 5000.00), (2, 1, 7000.00), (3, 2, 3000.00);", "sql": "SELECT c.name, SUM(d.amount) as total_donations FROM community_centers c INNER JOIN donations d ON c.id = d.center_id WHERE c.city = 'New York' GROUP BY c.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show first name and id for all customers with at least 2 accounts.", "schema": "CREATE TABLE Customers (customer_first_name VARCHAR, customer_id VARCHAR); CREATE TABLE Accounts (customer_id VARCHAR)", "sql": "SELECT T2.customer_first_name, T1.customer_id FROM Accounts AS T1 JOIN Customers AS T2 ON T1.customer_id = T2.customer_id GROUP BY T1.customer_id HAVING COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Insert a new record into the dispensaries table for a new recreational dispensary with a name of 'Dispensary E' and a total revenue of 80000.00 in Q1 2022.", "schema": "CREATE TABLE dispensaries (id INT, name VARCHAR(255), type VARCHAR(255), total_revenue DECIMAL(10,2));", "sql": "INSERT INTO dispensaries (id, name, type, total_revenue) VALUES (5, 'Dispensary E', 'recreational', 80000.00);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the average water temperature in the Indian Ocean Monitoring Station for each month?", "schema": "CREATE TABLE indian_ocean_monitoring_station (date DATE, temperature FLOAT);", "sql": "SELECT EXTRACT(MONTH FROM date) AS month, AVG(temperature) AS avg_temperature FROM indian_ocean_monitoring_station GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the minimum water consumption for each day in the month of September for the water treatment plant with ID 4 in the state of Florida in 2020?", "schema": "CREATE TABLE water_treatment_plant (plant_id INT, state VARCHAR(50), year INT, month INT, day INT, water_consumption FLOAT); INSERT INTO water_treatment_plant (plant_id, state, year, month, day, water_consumption) VALUES (4, 'Florida', 2020, 9, 1, 12345.6), (4, 'Florida', 2020, 9, 2, 23456.7), (4, 'Florida', 2020, 9, 3, 34567.8);", "sql": "SELECT day, MIN(water_consumption) as min_water_consumption FROM water_treatment_plant WHERE plant_id = 4 AND state = 'Florida' AND year = 2020 AND month = 9 GROUP BY day;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "List the number of veterans hired per state in the 'VeteranEmployment' table", "schema": "CREATE TABLE VeteranEmployment (id INT, state VARCHAR(255), veteran_hired INT);", "sql": "SELECT state, COUNT(*) as veterans_hired_count FROM VeteranEmployment GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Find the total number of electric and hybrid buses in the 'public_transportation' table.", "schema": "CREATE TABLE public_transportation (id INT, type VARCHAR(20), make VARCHAR(20), model VARCHAR(20), year INT, fuel_type VARCHAR(20));", "sql": "SELECT SUM(fuel_type = 'electric' OR fuel_type = 'hybrid') FROM public_transportation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the minimum number of containers handled in a single day by cranes in the Port of Rotterdam in April 2021?", "schema": "CREATE TABLE Port_Rotterdam_Crane_Stats (crane_name TEXT, handling_date DATE, containers_handled INTEGER); INSERT INTO Port_Rotterdam_Crane_Stats (crane_name, handling_date, containers_handled) VALUES ('CraneI', '2021-04-01', 70), ('CraneJ', '2021-04-02', 60), ('CraneK', '2021-04-03', 50), ('CraneL', '2021-04-04', 80);", "sql": "SELECT MIN(containers_handled) FROM Port_Rotterdam_Crane_Stats WHERE handling_date >= '2021-04-01' AND handling_date <= '2021-04-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 104).", "schema": null, "sql": "select pyreturnmultidemfloat4(6,5,3,2);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "How many military equipment maintenance requests were submitted in Q1 2022 for the Navy?", "schema": "CREATE TABLE Maintenance_Requests (Id INT, Request_Date DATE, Equipment_Type VARCHAR(50), Agency VARCHAR(50)); INSERT INTO Maintenance_Requests (Id, Request_Date, Equipment_Type, Agency) VALUES (1, '2022-01-01', 'Ship', 'Navy'); INSERT INTO Maintenance_Requests (Id, Request_Date, Equipment_Type, Agency) VALUES (2, '2022-04-01', 'Submarine', 'Navy');", "sql": "SELECT COUNT(*) FROM Maintenance_Requests WHERE Request_Date BETWEEN '2022-01-01' AND '2022-03-31' AND Agency = 'Navy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "How many students with visual impairments have received accommodations in the last year, broken down by country?", "schema": "CREATE TABLE student_accommodations (student_id INT, disability_type VARCHAR(255), country VARCHAR(255), date DATE); INSERT INTO student_accommodations (student_id, disability_type, country, date) VALUES (1, 'Visual Impairment', 'USA', '2021-03-22'); INSERT INTO student_accommodations (student_id, disability_type, country, date) VALUES (2, 'Visual Impairment', 'Canada', '2021-04-01');", "sql": "SELECT country, COUNT(*) as num_students FROM student_accommodations WHERE disability_type = 'Visual Impairment' AND date BETWEEN DATE_SUB(NOW(), INTERVAL 1 YEAR) AND NOW() GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Calculate the total production for each month of the year", "schema": "CREATE TABLE monthly_production (id INT, field_name VARCHAR(50), production_date DATE, qty FLOAT); INSERT INTO monthly_production (id, field_name, production_date, qty) VALUES (1, 'Galkynysh', '2019-01-01', 50000); INSERT INTO monthly_production (id, field_name, production_date, qty) VALUES (2, 'Samotlor', '2019-02-12', 60000);", "sql": "SELECT MONTH(production_date) as month, SUM(qty) as total_qty FROM monthly_production GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 235).", "schema": null, "sql": "CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many positive votes for Tennessee in 2006?", "schema": "CREATE TABLE table_name_73 (yes_vote VARCHAR, date VARCHAR, state VARCHAR)", "sql": "SELECT yes_vote FROM table_name_73 WHERE date = '2006' AND state = 'tennessee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Find the 2nd most visited destination for each year in the 'visits' table.", "schema": "CREATE TABLE visits (visit_id INT, destination TEXT, visit_date DATE); INSERT INTO visits (visit_id, destination, visit_date) VALUES (1, 'New York', '2022-01-01'), (2, 'New York', '2022-02-01'), (3, 'Vancouver', '2022-03-01');", "sql": "SELECT destination, EXTRACT(YEAR FROM visit_date) AS visit_year, RANK() OVER (PARTITION BY EXTRACT(YEAR FROM visit_date) ORDER BY COUNT(*) DESC) AS visit_rank FROM visits GROUP BY destination, EXTRACT(YEAR FROM visit_date) HAVING visit_rank = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 245, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 502).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION function_lang_is( NAME, NAME[], NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the game against Philadelphia 76ers?", "schema": "CREATE TABLE table_name_2 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_name_2 WHERE opponent = 'philadelphia 76ers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all support programs and their respective coordinators, ordered alphabetically by support program name.", "schema": "CREATE TABLE support_programs (id INT, name TEXT, coordinator TEXT);", "sql": "SELECT * FROM support_programs ORDER BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position does the player from texas a&m play?", "schema": "CREATE TABLE table_name_32 (position VARCHAR, school VARCHAR)", "sql": "SELECT position FROM table_name_32 WHERE school = 'texas a&m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Insert a new record for a player from 'Brazil' who uses Oculus and prefers PC gaming.", "schema": "CREATE TABLE Players (PlayerID INT, Country VARCHAR(20), VRPlatform VARCHAR(10), PrefersPC BOOLEAN);", "sql": "INSERT INTO Players (PlayerID, Country, VRPlatform, PrefersPC) VALUES (3, 'Brazil', 'Oculus', TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Find the difference in stream counts between consecutive rows for each country.", "schema": "CREATE TABLE users (id INT, country VARCHAR(50), stream_count INT); INSERT INTO users (id, country, stream_count) VALUES (1, 'USA', 100), (2, 'Canada', 120), (3, 'USA', 150), (4, 'Mexico', 80);", "sql": "SELECT country, stream_count - LAG(stream_count) OVER(PARTITION BY country ORDER BY id) AS stream_count_diff FROM users;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Local authority has Services of greater anglia?", "schema": "CREATE TABLE table_name_33 (local_authority VARCHAR, services VARCHAR)", "sql": "SELECT local_authority FROM table_name_33 WHERE services = 'greater anglia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest Total, when Second is \"55\"?", "schema": "CREATE TABLE table_name_2 (total INTEGER, second VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_2 WHERE second = 55;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Transaction (example 96).", "schema": null, "sql": "BEGIN\n FOR id IN SELECT 1/(x-1000) FROM generate_series(1,1000) x LOOP\n INSERT INTO test1 VALUES(id);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Transaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the model with a engine type with b5244 s2?", "schema": "CREATE TABLE table_name_97 (model VARCHAR, engine_type VARCHAR)", "sql": "SELECT model FROM table_name_97 WHERE engine_type = 'b5244 s2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The swimmer in lane 7 has what as the smallest rank?", "schema": "CREATE TABLE table_name_81 (rank INTEGER, lane VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_81 WHERE lane = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total CO2 emissions of manufacturing processes for all silk garments, in the 'manufacturing' table?", "schema": "CREATE TABLE manufacturing (id INT, garment_id INT, garment_material VARCHAR(50), co2_emissions DECIMAL(10, 2), process_date DATE);", "sql": "SELECT SUM(co2_emissions) AS total_co2_emissions FROM manufacturing WHERE garment_material = 'silk';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games or records were played on the Miami Orange Bowl?", "schema": "CREATE TABLE table_17386066_2 (record VARCHAR, stadium VARCHAR)", "sql": "SELECT COUNT(record) FROM table_17386066_2 WHERE stadium = 'Miami Orange Bowl';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 291).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_trigger ( NAME, NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1952 rate when the 1954 is more than 4.2?", "schema": "CREATE TABLE table_name_37 (Id VARCHAR)", "sql": "SELECT SUM(1952) FROM table_name_37 WHERE 1954 > 4.2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the maximum length of a dam in the 'dams' table?", "schema": "CREATE TABLE dams (dam_id INT, dam_name VARCHAR(50), location VARCHAR(50), length DECIMAL(10,2), reservoir_capacity INT);", "sql": "SELECT MAX(length) FROM dams;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "What is the percentage of games won by each team in the 2022 NBA playoffs?", "schema": "CREATE TABLE nba_teams (team_id INT, team_name VARCHAR(255)); INSERT INTO nba_teams VALUES (1, 'TeamA'), (2, 'TeamB'), (3, 'TeamC'); CREATE TABLE nba_games (game_id INT, home_team_id INT, away_team_id INT, home_score INT, away_score INT, playoff_round VARCHAR(255)); INSERT INTO nba_games VALUES (1, 1, 2, 90, 85, 'First Round'), (2, 1, 3, 80, 85, 'First Round'), (3, 2, 1, 95, 90, 'First Round'), (4, 2, 3, 88, 82, 'First Round');", "sql": "SELECT t.team_name, (SUM(CASE WHEN g.home_team_id = t.team_id THEN 1 ELSE 0 END) + SUM(CASE WHEN g.away_team_id = t.team_id THEN 1 ELSE 0 END) - SUM(CASE WHEN (g.home_team_id = t.team_id AND g.home_score < g.away_score) OR (g.away_team_id = t.team_id AND g.home_score > g.away_score) THEN 1 ELSE 0 END)) * 100.0 / COUNT(*) AS win_percentage FROM nba_teams t JOIN nba_games g ON t.team_id IN (g.home_team_id, g.away_team_id) WHERE g.playoff_round = 'First Round' GROUP BY t.team_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 483, "num_statements": 1} {"question": "What is the total number of volunteers from the country of Australia?", "schema": "CREATE TABLE volunteers (id INT, country TEXT); INSERT INTO volunteers (id, country) VALUES (1, 'Australia'), (2, 'Mexico'), (3, 'Australia');", "sql": "SELECT COUNT(*) FROM volunteers WHERE country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all document type codes and document type names.", "schema": "CREATE TABLE Ref_document_types (document_type_code VARCHAR, document_type_name VARCHAR)", "sql": "SELECT document_type_code, document_type_name FROM Ref_document_types;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total amount of climate finance committed to renewable energy projects in Africa?", "schema": "CREATE TABLE renewable_energy_projects (project_id INT, project_name VARCHAR(255), location VARCHAR(255), funded_by VARCHAR(255), amount DECIMAL(10,2)); INSERT INTO renewable_energy_projects (project_id, project_name, location, funded_by, amount) VALUES (1, 'Solar Farm in Kenya', 'Kenya', 'Global Green Fund', 5000000.00), (2, 'Wind Power in South Africa', 'South Africa', 'Climate Action Bank', 7000000.00);", "sql": "SELECT SUM(amount) FROM renewable_energy_projects WHERE location = 'Africa' AND funded_by IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many gold medals on average were earned by Italy when they held the rank of 4?", "schema": "CREATE TABLE table_name_5 (gold INTEGER, nation VARCHAR, rank VARCHAR)", "sql": "SELECT AVG(gold) FROM table_name_5 WHERE nation = 'italy' AND rank > 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 2).", "schema": null, "sql": "SELECT * FROM pg_partition_tree(0);", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_partition_tree(0)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 123).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _get_col_type ( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the number of cases and win rate for female attorneys in the West region?", "schema": "CREATE TABLE Cases (CaseID INT, AttorneyID INT, CaseOutcome VARCHAR(50)); INSERT INTO Cases (CaseID, AttorneyID, CaseOutcome) VALUES (1, 1, 'Won'), (2, 1, 'Lost'), (3, 2, 'Won'), (4, 2, 'Won'), (5, 3, 'Lost'), (6, 3, 'Lost'), (7, 4, 'Won'), (8, 4, 'Lost'), (9, 5, 'Lost'), (10, 5, 'Won'); CREATE TABLE Attorneys (AttorneyID INT, AttorneyName VARCHAR(50), Gender VARCHAR(50)); INSERT INTO Attorneys (AttorneyID, AttorneyName, Gender) VALUES (1, 'Jane Doe', 'Female'), (2, 'John Smith', 'Male'), (3, 'Sara Connor', 'Female'), (4, 'David Kim', 'Male'), (5, 'Emily Johnson', 'Female');", "sql": "SELECT a.AttorneyName, a.Gender, COUNT(c.CaseID) AS TotalCases, COUNT(c.CaseID) * 100.0 / SUM(COUNT(c.CaseID)) OVER (PARTITION BY a.Gender) AS WinRate FROM Attorneys a JOIN Cases c ON a.AttorneyID = c.AttorneyID WHERE a.Gender = 'Female' AND a.Region = 'West' GROUP BY a.AttorneyName, a.Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 294, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the fewest number of pages for the title al-jiniral fi matahatihi?", "schema": "CREATE TABLE table_name_39 (pages INTEGER, title VARCHAR)", "sql": "SELECT MIN(pages) FROM table_name_39 WHERE title = 'al-jiniral fi matahatihi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total revenue for each region where garments were sold with a price greater than '250'?", "schema": "CREATE TABLE sales (id INT, salesperson_id INT, garment_id INT, region TEXT, price INT); INSERT INTO sales (id, salesperson_id, garment_id, region, price) VALUES (1, 1, 1, 'Paris', 250), (2, 1, 2, 'London', 120), (3, 2, 3, 'Paris', 180), (4, 2, 4, 'London', 220), (5, 3, 5, 'Berlin', 200), (6, 3, 6, 'Berlin', 160);", "sql": "SELECT region, SUM(price) AS total_revenue FROM sales WHERE price > 250 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Update the donation amount for 'The Life You Can Save' to $35,000 in the 'philanthropic_trends' table.", "schema": "CREATE TABLE philanthropic_trends (organization_name TEXT, donation_amount INTEGER); INSERT INTO philanthropic_trends (organization_name, donation_amount) VALUES ('Effctive Altruism Funds', 50000), ('GiveWell', 40000), ('The Life You Can Save', 30000);", "sql": "UPDATE philanthropic_trends SET donation_amount = 35000 WHERE organization_name = 'The Life You Can Save';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which mideast regional state contains the city of Seattle?", "schema": "CREATE TABLE table_name_53 (state VARCHAR, region VARCHAR, city VARCHAR)", "sql": "SELECT state FROM table_name_53 WHERE region = 'mideast' AND city = 'seattle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Determine the average number of students enrolled in courses offered by 'Columbia U' each season.", "schema": "CREATE TABLE course_enrollment (course_id INT, university VARCHAR(20), num_students INT); INSERT INTO course_enrollment (course_id, university, num_students) VALUES (1, 'Columbia U', 30), (2, 'Yale U', 20), (3, 'Columbia U', 40);", "sql": "SELECT AVG(num_students) FROM course_enrollment WHERE university = 'Columbia U';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What spacecraft had an EVA that ended at 17:28?", "schema": "CREATE TABLE table_name_66 (spacecraft VARCHAR, end_time VARCHAR)", "sql": "SELECT spacecraft FROM table_name_66 WHERE end_time = '17:28';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the To par of the New Zealand Player?", "schema": "CREATE TABLE table_name_79 (to_par VARCHAR, country VARCHAR)", "sql": "SELECT to_par FROM table_name_79 WHERE country = 'new zealand';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'amutils': Write the SELECT query (example 10).", "schema": null, "sql": "select col, prop, pg_index_column_has_property(o, col, prop)\n from (values ('foocover'::regclass)) v1(o),\n (values (1,'orderable'),(2,'asc'),(3,'desc'),\n (4,'nulls_first'),(5,'nulls_last'),\n (6,'distance_orderable'),(7,'returnable'),\n (8, 'bogus')) v2(idx,prop),\n generate_series(1,3) col\n order by col, idx;", "explanation": "Regression test for Amutils in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select col, prop, pg_index_column_has_property(o, col, prop)\n from (values ('foocover'::regclass)) v1(o),\n (values (1,'orderable'),(2,'asc'),(3,'desc'),\n (4,'nulls_first'),(5,'nulls_last'),\n (6,'distance_orderable'),(7,'returnable'),\n (8, 'bogus')) v2(idx,prop),\n generate_series(1,3) col\n order by col, idx) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 363, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the stadium of the NK Zagreb?", "schema": "CREATE TABLE table_name_70 (stadium VARCHAR, team VARCHAR)", "sql": "SELECT stadium FROM table_name_70 WHERE team = 'nk zagreb';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "List all climate mitigation projects in Asia that were started after 2015 and their respective budgets.", "schema": "CREATE TABLE climate_projects (project_name VARCHAR(50), location VARCHAR(50), start_year INT, budget INT, sector VARCHAR(50)); INSERT INTO climate_projects (project_name, location, start_year, budget, sector) VALUES ('Solar Farm A', 'China', 2017, 1000000, 'Solar'), ('Wind Farm B', 'India', 2018, 1500000, 'Wind');", "sql": "SELECT project_name, location, budget FROM climate_projects WHERE location IN ('Asia') AND start_year > 2015 AND sector = 'Climate Mitigation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the average budget per program?", "schema": "CREATE TABLE Programs (id INT, program TEXT, budget DECIMAL(10,2)); INSERT INTO Programs (id, program, budget) VALUES (1, 'Feeding the Hungry', 5000.00), (2, 'Clothing Drive', 3000.00), (3, 'Education', 7000.00);", "sql": "SELECT program, AVG(budget) FROM Programs GROUP BY program;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 15).", "schema": null, "sql": "SELECT * FROM bt_multi_page_stats('test2_col1_idx', 3, 2);", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 55).", "schema": null, "sql": "SELECT * FROM check_test(\n table_owner_is('apart', current_user, 'mumble'),\n\ttrue,\n 'table_owner_is(part, user, desc)',\n 'mumble',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Update the launch date of digital asset with id = 2 to '2021-03-16'.", "schema": "CREATE TABLE digital_assets (id INT, name VARCHAR(255), company VARCHAR(255), launch_date DATE, developer VARCHAR(255)); INSERT INTO digital_assets (id, name, company, launch_date, developer) VALUES (1, 'Asset 1', 'Company A', '2021-01-01', 'Alice Johnson'), (2, 'Asset 2', 'Company B', '2021-02-15', 'Bruno Alvarez');", "sql": "UPDATE digital_assets SET launch_date = '2021-03-16' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average length of hospital stays for patients with eating disorders?", "schema": "CREATE TABLE stays (id INT, patient_id INT, length INT, condition TEXT); CREATE TABLE conditions (id INT, name TEXT); INSERT INTO conditions (id, name) VALUES (1, 'Eating Disorder');", "sql": "SELECT AVG(length) FROM stays WHERE condition = 'Eating Disorder';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: During the game at Candlestick Park, who was the visiting team?", "schema": "CREATE TABLE table_name_99 (visiting_team VARCHAR, stadium VARCHAR)", "sql": "SELECT visiting_team FROM table_name_99 WHERE stadium = 'candlestick park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which cruelty-free cosmetic products have the highest sales in the UK?", "schema": "CREATE TABLE products (product_id INT, product_name TEXT, is_cruelty_free BOOLEAN, sales INT, country TEXT); INSERT INTO products (product_id, product_name, is_cruelty_free, sales, country) VALUES (1, 'Lipstick', true, 550, 'UK'), (2, 'Foundation', false, 600, 'UK'), (3, 'Mascara', true, 700, 'UK');", "sql": "SELECT product_name, SUM(sales) as total_sales FROM products WHERE is_cruelty_free = true AND country = 'UK' GROUP BY product_name ORDER BY total_sales DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the total revenue by platform?", "schema": "CREATE TABLE ad_data (platform VARCHAR(20), revenue NUMERIC(10,2));INSERT INTO ad_data VALUES ('FB',1000),('IG',2000),('TW',3000),('SN',4000),('LI',5000);", "sql": "SELECT platform, SUM(revenue) FROM ad_data GROUP BY platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the minimum GPA of graduate students in the Physics department?", "schema": "CREATE TABLE graduate_students (student_id INT, name TEXT, gpa DECIMAL(3,2), department TEXT);", "sql": "SELECT MIN(gs.gpa) FROM graduate_students gs WHERE gs.department = 'Physics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average temperature trend in the Arctic Ocean, grouped by month, from January 2021 to December 2021?", "schema": "CREATE TABLE OceanTemperature (Date DATE, Location VARCHAR(255), Temperature FLOAT); INSERT INTO OceanTemperature (Date, Location, Temperature) VALUES ('2021-01-01', 'Arctic Ocean', -1.5), ('2021-02-01', 'Arctic Ocean', -2.0);", "sql": "SELECT DATE_FORMAT(Date, '%Y-%m') AS Month, AVG(Temperature) FROM OceanTemperature WHERE Date BETWEEN '2021-01-01' AND '2021-12-31' GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "What was the average exhibition duration for paintings, partitioned by artist?", "schema": "CREATE TABLE GalleryG (gallery_name VARCHAR(20), artist VARCHAR(20), artwork_type VARCHAR(20), exhibition_duration INT); INSERT INTO GalleryG (gallery_name, artist, artwork_type, exhibition_duration) VALUES ('GalleryG', 'Picasso', 'Painting', 120), ('GalleryG', 'Picasso', 'Painting', 90), ('GalleryG', 'Dali', 'Painting', 60);", "sql": "SELECT artist, AVG(exhibition_duration) as avg_duration FROM (SELECT gallery_name, artist, artwork_type, exhibition_duration, ROW_NUMBER() OVER (PARTITION BY gallery_name, artist ORDER BY exhibition_duration) as rn FROM GalleryG) tmp WHERE rn = 1 GROUP BY artist;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 263, "num_statements": 1} {"question": "List transactions from the last 30 days", "schema": "CREATE TABLE transactions (id INT PRIMARY KEY, customer_id INT, amount DECIMAL(10,2), transaction_date DATE); INSERT INTO transactions (id, customer_id, amount, transaction_date) VALUES (1, 1, 500.00, '2022-01-01'); INSERT INTO transactions (id, customer_id, amount, transaction_date) VALUES (2, 2, 750.00, '2022-01-02');", "sql": "SELECT * FROM transactions t WHERE t.transaction_date >= CURDATE() - INTERVAL 30 DAY;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of luxury hotels in Japan and Germany?", "schema": "CREATE TABLE Hotels (id INT, country VARCHAR(50), category VARCHAR(50), PRIMARY KEY(id)); INSERT INTO Hotels (id, country, category) VALUES (1, 'Japan', 'Luxury'), (2, 'Germany', 'Luxury'), (3, 'Japan', 'Budget');", "sql": "SELECT SUM(CASE WHEN country IN ('Japan', 'Germany') AND category = 'Luxury' THEN 1 ELSE 0 END) FROM Hotels;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the transfer window for Christian Nerlinger?", "schema": "CREATE TABLE table_name_23 (transfer_window VARCHAR, name VARCHAR)", "sql": "SELECT transfer_window FROM table_name_23 WHERE name = 'christian nerlinger';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the maximum hectares of any forest in Brazil?", "schema": "CREATE TABLE Forests (id INT, name VARCHAR(50), hectares FLOAT, country VARCHAR(50)); INSERT INTO Forests (id, name, hectares, country) VALUES (1, 'Amazon Rainforest', 55000000.0, 'Brazil'); CREATE TABLE Forest_Hectares (id INT, forest_id INT, hectares FLOAT); INSERT INTO Forest_Hectares (id, forest_id, hectares) VALUES (1, 1, 55000000.0);", "sql": "SELECT MAX(hectares) FROM Forest_Hectares WHERE forest_id IN (SELECT id FROM Forests WHERE country = 'Brazil');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is High Assists, when High Points is \"Kobe Bryant (27)\", and when High Rebounds is \"Pau Gasol (11)\"?", "schema": "CREATE TABLE table_name_13 (high_assists VARCHAR, high_points VARCHAR, high_rebounds VARCHAR)", "sql": "SELECT high_assists FROM table_name_13 WHERE high_points = 'kobe bryant (27)' AND high_rebounds = 'pau gasol (11)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 251).", "schema": null, "sql": "insert into test_range_spgist select 'empty'::int4range from generate_series(1,500) g;", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With Laps greater than 78, what is the lowest Grid?", "schema": "CREATE TABLE table_name_30 (grid INTEGER, laps INTEGER)", "sql": "SELECT MIN(grid) FROM table_name_30 WHERE laps > 78;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result for the date of august 23?", "schema": "CREATE TABLE table_name_83 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_83 WHERE date = 'august 23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What are the health equity metrics in Colorado?", "schema": "CREATE TABLE HealthEquity (id INT, metric_name TEXT, state TEXT); INSERT INTO HealthEquity (id, metric_name, state) VALUES (1, 'Access Index 2020', 'Colorado'); INSERT INTO HealthEquity (id, metric_name, state) VALUES (2, 'Quality Index 2018', 'Colorado');", "sql": "SELECT * FROM HealthEquity WHERE state = 'Colorado';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the minimum distance traveled by an electric vehicle in 'tokyo'?", "schema": "CREATE TABLE vehicles (id INT, city VARCHAR(20), type VARCHAR(20), is_electric BOOLEAN, daily_distance INT); INSERT INTO vehicles VALUES (1, 'tokyo', 'sedan', true, 30); INSERT INTO vehicles VALUES (2, 'tokyo', 'suv', true, 40); INSERT INTO vehicles VALUES (3, 'osaka', 'truck', false, 50);", "sql": "SELECT MIN(daily_distance) FROM vehicles WHERE city = 'tokyo' AND is_electric = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total quantity of chicken dishes sold in the month of January 2022?", "schema": "CREATE TABLE menu (menu_id INT, name VARCHAR(50), category VARCHAR(50), price DECIMAL(5,2)); INSERT INTO menu (menu_id, name, category, price) VALUES (1, 'Chicken Alfredo', 'Pasta', 12.99), (2, 'Chicken Parmesan', 'Entree', 16.99); CREATE TABLE orders (order_id INT, order_date DATE, menu_id INT, quantity INT); INSERT INTO orders (order_id, order_date, menu_id, quantity) VALUES (1, '2022-01-03', 1, 3), (2, '2022-01-03', 2, 2), (3, '2022-01-05', 1, 1);", "sql": "SELECT SUM(quantity) FROM orders JOIN menu ON orders.menu_id = menu.menu_id WHERE menu.category = 'Pasta' AND orders.order_date BETWEEN '2022-01-01' AND '2022-01-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When @ new orleans is the team who has the highest amount of rebounds?", "schema": "CREATE TABLE table_17288825_6 (high_rebounds VARCHAR, team VARCHAR)", "sql": "SELECT high_rebounds FROM table_17288825_6 WHERE team = '@ New Orleans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of hours of professional development completed by teachers in the \"Urban Education\" district?", "schema": "CREATE TABLE Teachers (TeacherID INT PRIMARY KEY, District VARCHAR(50), Hours INT); INSERT INTO Teachers (TeacherID, District, Hours) VALUES (1, 'Urban Education', 20);", "sql": "SELECT SUM(Hours) FROM Teachers WHERE District = 'Urban Education';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Performer 2 of compilation 2 is what performer 1?", "schema": "CREATE TABLE table_name_91 (performer_1 VARCHAR, performer_2 VARCHAR)", "sql": "SELECT performer_1 FROM table_name_91 WHERE performer_2 = 'compilation 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "List all journalists who have published articles on a specific topic", "schema": "CREATE TABLE journalists (id INT PRIMARY KEY, name TEXT NOT NULL); CREATE TABLE articles (id INT PRIMARY KEY, title TEXT NOT NULL, author_id INT, published_at DATE); CREATE TABLE articles_topics (article_id INT, topic_id INT); CREATE TABLE topics (id INT PRIMARY KEY, name TEXT NOT NULL);", "sql": "SELECT journalists.name FROM journalists INNER JOIN articles ON journalists.id = articles.author_id INNER JOIN articles_topics ON articles.id = articles_topics.article_id INNER JOIN topics ON articles_topics.topic_id = topics.id WHERE topics.name = 'specific_topic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 266, "num_statements": 1} {"question": "Display the names and construction dates of all bridges in Washington", "schema": "CREATE TABLE bridges (id INT, name TEXT, construction_date DATE, location TEXT); INSERT INTO bridges (id, name, construction_date, location) VALUES (1, 'Bridge A', '1975-01-01', 'Washington'), (2, 'Bridge B', '1982-06-30', 'Texas');", "sql": "SELECT name, construction_date FROM bridges WHERE location = 'Washington';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the total number of building permits issued in each state for sustainable building practices?", "schema": "CREATE TABLE permit_data (state VARCHAR(255), permit INT, sustainable BOOLEAN); INSERT INTO permit_data (state, permit, sustainable) VALUES ('California', 500, TRUE), ('Texas', 400, FALSE), ('New York', 600, TRUE);", "sql": "SELECT state, SUM(permit) FROM permit_data WHERE sustainable = TRUE GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'dict_int' (example 16).", "schema": null, "sql": "select ts_lexize('intdict', '1279');", "explanation": "Example query from the 'dict_int' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT stddev_pop('inf'::numeric), stddev_samp('inf'::numeric);", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT stddev_pop('inf'::numeric), stddev_samp('inf'::numeric)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many donors from each country donated in 2023?", "schema": "CREATE TABLE donors (donor_id INT, donation_date DATE, donation_amount DECIMAL(10, 2), country VARCHAR(50)); INSERT INTO donors VALUES (17, '2023-01-01', 50.00, 'Brazil'), (18, '2023-01-15', 100.00, 'India'), (19, '2023-03-05', 200.00, 'China');", "sql": "SELECT country, COUNT(DISTINCT donor_id) FROM donors WHERE donation_date BETWEEN '2023-01-01' AND '2023-12-31' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "List all ships that entered the ports located in the USA in 2022, including their arrival dates and maximum cargo capacity.", "schema": "CREATE TABLE ship (ship_id INT, ship_name VARCHAR(50), max_cargo_capacity INT); INSERT INTO ship VALUES (1, 'Sealand Eagle', 12000); INSERT INTO ship VALUES (2, 'MSC Flaminia', 15000); CREATE TABLE port_of_call (port_id INT, ship_id INT, call_date DATE, port_country VARCHAR(50)); INSERT INTO port_of_call VALUES (1, 1, '2022-03-20', 'USA');", "sql": "SELECT s.ship_name, poc.call_date, s.max_cargo_capacity FROM ship s JOIN port_of_call poc ON s.ship_id = poc.ship_id JOIN port p ON poc.port_id = p.port_id WHERE p.port_country = 'USA' AND poc.call_date >= '2022-01-01' AND poc.call_date < '2023-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'macaddr8' (example 27).", "schema": null, "sql": "INSERT INTO macaddr8_data VALUES (20, '08002b01:02030405');", "explanation": "DML from PostgreSQL core regression test for Macaddr8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many total points does Mike Hyndman have with more than 119 assists?", "schema": "CREATE TABLE table_name_98 (points INTEGER, player VARCHAR, assists VARCHAR)", "sql": "SELECT SUM(points) FROM table_name_98 WHERE player = 'mike hyndman' AND assists > 119;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the country of the play based in Athens at the Attis Theatre company?", "schema": "CREATE TABLE table_name_33 (country VARCHAR, base VARCHAR, company VARCHAR)", "sql": "SELECT country FROM table_name_33 WHERE base = 'athens' AND company = 'attis theatre';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION check_foreign_key()\nRETURNS trigger\nAS 'MODULE_PATHNAME'\nLANGUAGE C;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 646).", "schema": null, "sql": "INSERT INTO fk_notpartitioned_pk VALUES (2502, 2503);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Calculate the average calories of vegan menu items in 'HealthyDiner' view.", "schema": "CREATE VIEW HealthyDiner AS SELECT * FROM MenuItems WHERE is_vegan = TRUE;", "sql": "SELECT AVG(calories) FROM HealthyDiner WHERE item_type = 'Main Course';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "List the total cargo weight for each cargo type in the 'cargo_tracking' table, grouped by week?", "schema": "CREATE TABLE cargo_tracking (cargo_id INT, cargo_type VARCHAR(50), weight FLOAT, timestamp TIMESTAMP);", "sql": "SELECT DATE_FORMAT(timestamp, '%Y-%u') AS week, cargo_type, SUM(weight) FROM cargo_tracking GROUP BY week, cargo_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Delete the record for the veteran with the SSN 123-45-6789 from the employees table.", "schema": "CREATE TABLE employees (ssn VARCHAR(11), first_name VARCHAR(20), last_name VARCHAR(20), job_title VARCHAR(30));", "sql": "DELETE FROM employees WHERE ssn = '123-45-6789';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "pgTAP test for Valueset (assertion 311).", "schema": null, "sql": "/****************************************************************************/\n-- Test set_ne() with an array argument.\nSELECT * FROM check_test(\n set_ne(\n 'justnames',\n ARRAY['Andrew', 'Anna' ],\n 'whatever'\n ),\n true,\n 'set_ne(prepared, array, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 311, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent of the game after week 4 on Sun. Nov. 17?", "schema": "CREATE TABLE table_name_70 (opponent VARCHAR, week VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_70 WHERE week > 4 AND date = 'sun. nov. 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Calculate the average property price for properties with inclusive housing policies in New York.", "schema": "CREATE TABLE properties_inclusive_ny (id INT, price FLOAT); INSERT INTO properties_inclusive_ny (id, price) VALUES (1, 550000), (2, 600000), (3, 650000);", "sql": "SELECT AVG(price) FROM properties_inclusive_ny WHERE id IN (SELECT property_id FROM inclusive_housing WHERE city = 'New York');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the average CO2 emission (in grams) of factories in the 'Europe' region that manufactured 'Wool Sweaters' in Q4 of 2021?", "schema": "CREATE TABLE CO2Emission (id INT PRIMARY KEY, factory_name VARCHAR(50), region VARCHAR(50), garment_type VARCHAR(50), co2_emission INT, manufacturing_date DATE); INSERT INTO CO2Emission (id, factory_name, region, garment_type, co2_emission, manufacturing_date) VALUES (1, 'Factory B', 'Europe', 'Wool Sweaters', 500, '2021-11-12');", "sql": "SELECT AVG(co2_emission) as avg_co2_emission FROM CO2Emission WHERE region = 'Europe' AND garment_type = 'Wool Sweaters' AND manufacturing_date BETWEEN '2021-10-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 21).", "schema": null, "sql": "SELECT citext_cmp('aardvark'::citext, 'aardVark'::citext) AS zero;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average dapp rating for each category?", "schema": "CREATE TABLE dapp_categories (category_id INT, category_name VARCHAR(30), category_description TEXT, avg_rating DECIMAL(3,2), total_dapps INT); INSERT INTO dapp_categories (category_id, category_name, category_description, avg_rating, total_dapps) VALUES (1, 'Gaming', 'Interactive gaming applications', 4.4, 12), (2, 'Finance', 'Decentralized financial services', 4.6, 8), (3, 'Social', 'Social media and networking platforms', 4.1, 15);", "sql": "SELECT category_name, avg_rating FROM dapp_categories;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the average age of employees with the title 'Supervisor' in the 'employees' table?", "schema": "CREATE TABLE employees(id INT, name VARCHAR(255), title VARCHAR(255), age INT); INSERT INTO employees(id, name, title, age) VALUES ('1', 'Jane Smith', 'Mining Supervisor', '55');", "sql": "SELECT AVG(age) FROM employees WHERE title LIKE '%Supervisor%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Identify the number of unique ethical clothing brands in the inventory.", "schema": "CREATE TABLE Inventory (id INT, product_name VARCHAR(255), brand VARCHAR(255), is_ethical BOOLEAN); INSERT INTO Inventory (id, product_name, brand, is_ethical) VALUES (1, 'Organic Cotton T-Shirt', 'Green Values', true), (2, 'Recycled Polyester Hoodie', 'EcoWear', true), (3, 'Regular T-Shirt', 'Fashion First', false);", "sql": "SELECT COUNT(DISTINCT brand) FROM Inventory WHERE is_ethical = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER TABLE (example 25).", "schema": null, "sql": "ALTER TABLE cities ATTACH PARTITION cities_ab FOR VALUES IN ('a', 'b');", "explanation": "PostgreSQL ALTER TABLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": true, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who replaced when position in table is pre-season?", "schema": "CREATE TABLE table_10592536_8 (replaced_by VARCHAR, position_in_table VARCHAR)", "sql": "SELECT replaced_by FROM table_10592536_8 WHERE position_in_table = 'Pre-season';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: From what series is Peck Up Your Troubles?", "schema": "CREATE TABLE table_name_66 (series VARCHAR, title VARCHAR)", "sql": "SELECT series FROM table_name_66 WHERE title = 'peck up your troubles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the away game in week 8?", "schema": "CREATE TABLE table_name_2 (date VARCHAR, home_away_game VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_2 WHERE home_away_game = 'away' AND week = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which community policing districts have the highest police interactions?", "schema": "CREATE TABLE CommunityPolicing (id INT, district VARCHAR(255), police_interactions INT);", "sql": "SELECT district, MAX(police_interactions) FROM CommunityPolicing GROUP BY district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many satellites have been launched by each organization?", "schema": "CREATE TABLE satellites_by_org (org TEXT, num_satellites INT); INSERT INTO satellites_by_org (org, num_satellites) VALUES ('NASA', 500), ('SpaceX', 300), ('ESA', 250), ('ULA', 200), ('ISRO', 150), ('Roscosmos', 100);", "sql": "SELECT org, SUM(num_satellites) FROM satellites_by_org GROUP BY org;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rank of The Big Doll House?", "schema": "CREATE TABLE table_name_62 (rank INTEGER, title VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_62 WHERE title = 'the big doll house';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Which country has the highest total production of lead, Peru or the USA?", "schema": "CREATE TABLE lead_production (country VARCHAR(20), quantity INT); INSERT INTO lead_production (country, quantity) VALUES ('Peru', 450000), ('USA', 400000);", "sql": "SELECT country, MAX(quantity) FROM lead_production WHERE country IN ('Peru', 'USA') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL regression test 'point': Write the SELECT query (example 42).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('1,y', 'point');", "explanation": "Regression test for Point in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('1,y', 'point')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average recycling rate in percentage for the bottom 2 material types in 2019?", "schema": "CREATE TABLE recycling_rates (material VARCHAR(255), year INT, rate FLOAT); INSERT INTO recycling_rates (material, year, rate) VALUES ('Plastic', 2019, 12.0), ('Glass', 2019, 22.0), ('Paper', 2019, 35.0), ('Metal', 2019, 45.0);", "sql": "SELECT r.material, AVG(r.rate) as avg_rate FROM recycling_rates r WHERE r.year = 2019 AND r.material IN (SELECT material FROM recycling_rates WHERE year = 2019 ORDER BY rate LIMIT 2 OFFSET 2) GROUP BY r.material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the ranks for the film, eddie murphy raw?", "schema": "CREATE TABLE table_name_19 (rank INTEGER, title VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_19 WHERE title = 'eddie murphy raw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the no with current club being panathinaikos and position being forward", "schema": "CREATE TABLE table_12962773_1 (no VARCHAR, current_club VARCHAR, position VARCHAR)", "sql": "SELECT COUNT(no) FROM table_12962773_1 WHERE current_club = 'Panathinaikos' AND position = 'Forward';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many totals does Chile have when the number of silvers is more than 0?", "schema": "CREATE TABLE table_name_73 (total VARCHAR, nation VARCHAR, silver VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_73 WHERE nation = 'chile' AND silver > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average age of patients who have received cognitive behavioral therapy (CBT)?", "schema": "CREATE TABLE patients (patient_id INT, age INT, gender VARCHAR(20), condition VARCHAR(50)); INSERT INTO patients (patient_id, age, gender, condition) VALUES (1, 35, 'Female', 'Depression'); CREATE TABLE treatments (treatment_id INT, patient_id INT, therapy_type VARCHAR(50), duration INT); INSERT INTO treatments (treatment_id, patient_id, therapy_type, duration) VALUES (1, 1, 'CBT', 12);", "sql": "SELECT AVG(patients.age) FROM patients JOIN treatments ON patients.patient_id = treatments.patient_id WHERE treatments.therapy_type = 'CBT';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the maximum daily water usage (in million gallons) in the state of New York in the summer months (June, July, August)?", "schema": "CREATE TABLE ny_water_usage (id INT, daily_usage FLOAT, usage_location VARCHAR(255), usage_date DATE); INSERT INTO ny_water_usage (id, daily_usage, usage_location, usage_date) VALUES (1, 5.6, 'New York', '2022-07-01'), (2, 6.2, 'New York', '2022-08-01'), (3, 4.8, 'New York', '2022-05-01');", "sql": "SELECT MAX(daily_usage) FROM ny_water_usage WHERE usage_location = 'New York' AND EXTRACT(MONTH FROM usage_date) IN (6, 7, 8);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'vacuum' (example 88).", "schema": null, "sql": "INSERT INTO pvactst2 SELECT generate_series(1, 1000);", "explanation": "DML from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people are in the crowd in south melbourne?", "schema": "CREATE TABLE table_name_66 (crowd VARCHAR, home_team VARCHAR)", "sql": "SELECT COUNT(crowd) FROM table_name_66 WHERE home_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of crimes committed in 'Sydney' in the month of 'February' for each year?", "schema": "CREATE TABLE crimes (id INT, city VARCHAR(20), month VARCHAR(10), year INT, crimes INT);", "sql": "SELECT year, SUM(crimes) FROM crimes WHERE city = 'Sydney' AND month = 'February' GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Calculate the maximum daily production quantity of copper for mining sites in Africa, for the year 2018, with over 50 employees.", "schema": "CREATE TABLE copper_mine (site_id INT, country VARCHAR(50), num_employees INT, extraction_date DATE, quantity INT); INSERT INTO copper_mine (site_id, country, num_employees, extraction_date, quantity) VALUES (1, 'Africa', 65, '2018-01-02', 1500), (2, 'Africa', 55, '2018-12-31', 1700), (3, 'Africa', 70, '2018-03-04', 2100);", "sql": "SELECT country, MAX(quantity) as max_daily_copper_prod FROM copper_mine WHERE num_employees > 50 AND country = 'Africa' AND extraction_date >= '2018-01-01' AND extraction_date <= '2018-12-31' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "What is the total number of autonomous vehicles sold in South Korea in 2020?", "schema": "CREATE TABLE AutonomousSales (Id INT, Vehicle VARCHAR(100), Year INT, Country VARCHAR(50), IsAutonomous BOOLEAN); INSERT INTO AutonomousSales (Id, Vehicle, Year, Country, IsAutonomous) VALUES (1, 'Model S', 2020, 'South Korea', true), (2, 'Model 3', 2020, 'South Korea', true), (3, 'Golf', 2020, 'South Korea', false), (4, 'Polo', 2020, 'South Korea', false), (5, 'Tesla Model X', 2020, 'South Korea', true);", "sql": "SELECT COUNT(*) FROM AutonomousSales WHERE Year = 2020 AND Country = 'South Korea' AND IsAutonomous = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What medium was used for the sculpture of George Harold Baker?", "schema": "CREATE TABLE table_20903658_1 (medium VARCHAR, title_subject VARCHAR)", "sql": "SELECT medium FROM table_20903658_1 WHERE title_subject = 'George Harold Baker';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Compare the total sales revenue of recreational and medical cannabis in Michigan and Illinois.", "schema": "CREATE TABLE sales_revenue (sales_type VARCHAR(10), state VARCHAR(20), revenue INT); INSERT INTO sales_revenue (sales_type, state, revenue) VALUES ('Recreational', 'Michigan', 1000); INSERT INTO sales_revenue (sales_type, state, revenue) VALUES ('Medical', 'Michigan', 800); INSERT INTO sales_revenue (sales_type, state, revenue) VALUES ('Recreational', 'Illinois', 1200); INSERT INTO sales_revenue (sales_type, state, revenue) VALUES ('Medical', 'Illinois', 900);", "sql": "SELECT sales_type, state, SUM(revenue) FROM sales_revenue WHERE state IN ('Michigan', 'Illinois') GROUP BY sales_type, state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_type' (example 16).", "schema": null, "sql": "CREATE TYPE int42 (\n internallength = 4,\n input = int42_in,\n output = int42_out,\n alignment = int4,\n default = 42,\n passedbyvalue\n);", "explanation": "DDL from PostgreSQL core regression test for Create Type.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 144, "num_statements": 1} {"question": "What is the total revenue for restaurants that serve sustainable menu items?", "schema": "CREATE TABLE restaurant_revenue(location VARCHAR(255), revenue INT); INSERT INTO restaurant_revenue(location, revenue) VALUES ('Location1', 5000), ('Location2', 7000), ('Location3', 3000), ('Restaurant4', 6000), ('Restaurant5', 4000), ('Restaurant9', 9000), ('Restaurant10', 8000);", "sql": "SELECT SUM(revenue) FROM restaurant_revenue INNER JOIN sustainable_sourcing ON restaurant_revenue.location = sustainable_sourcing.menu_item WHERE sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "What is the total number of network devices installed in each region and the total number of mobile subscribers in those regions?", "schema": "CREATE TABLE network_devices (id INT, region VARCHAR(20), install_date DATE); CREATE TABLE mobile_subscribers (id INT, region VARCHAR(20), data_usage INT, usage_date DATE);", "sql": "SELECT n.region, COUNT(n.id) AS num_devices, COUNT(m.id) AS num_subscribers FROM network_devices n INNER JOIN mobile_subscribers m ON n.region = m.region GROUP BY n.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the total number of tourists visiting countries with a travel advisory level of 3 or higher?", "schema": "CREATE TABLE travel_advisory (id INT, country VARCHAR(20), level INT); INSERT INTO travel_advisory (id, country, level) VALUES (1, 'Mexico', 2), (2, 'Brazil', 3), (3, 'Spain', 1), (4, 'Italy', 3), (5, 'Germany', 1);", "sql": "SELECT SUM(tourists) FROM (SELECT 1 AS tourists FROM travel_advisory WHERE level >= 3);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score for the game that was contested on February 1, 2009?", "schema": "CREATE TABLE table_10548224_1 (final_score VARCHAR, date_contested VARCHAR)", "sql": "SELECT final_score FROM table_10548224_1 WHERE date_contested = 'February 1, 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the free fare zone for the Arena station?", "schema": "CREATE TABLE table_name_65 (free_fare_zone VARCHAR, station_name VARCHAR)", "sql": "SELECT free_fare_zone FROM table_name_65 WHERE station_name = 'arena';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of fish in all tanks on the farm?", "schema": "CREATE TABLE Tank_Inventory (tank VARCHAR(50), species VARCHAR(50), quantity INT); INSERT INTO Tank_Inventory (tank, species, quantity) VALUES ('Tank1', 'Salmon', 200), ('Tank1', 'Trout', 150), ('Tank2', 'Salmon', 250), ('Tank2', 'Tilapia', 300);", "sql": "SELECT SUM(quantity) FROM Tank_Inventory;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "How many green building projects are there in each city in the 'smart_cities' schema?", "schema": "CREATE TABLE projects (project_id INT, city TEXT, schema_name TEXT); INSERT INTO projects (project_id, city, schema_name) VALUES (1, 'New York', 'smart_cities'), (2, 'Los Angeles', 'smart_cities'), (3, 'Chicago', 'smart_cities'), (4, 'Houston', 'smart_cities'); CREATE TABLE green_projects (project_id INT, project_type TEXT); INSERT INTO green_projects (project_id, project_type) VALUES (1, 'green building'), (2, 'renewable energy'), (3, 'smart city'), (4, 'carbon offset');", "sql": "SELECT city, COUNT(DISTINCT project_id) FROM projects JOIN green_projects ON projects.project_id = green_projects.project_id WHERE schema_name = 'smart_cities' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the maximum claim amount for renters insurance in Texas in the last month?", "schema": "CREATE TABLE claims (id INT, state VARCHAR(2), policy_type VARCHAR(20), claim_amount DECIMAL(10,2), claim_date DATE); INSERT INTO claims (id, state, policy_type, claim_amount, claim_date) VALUES (1, 'TX', 'Auto', 2500, '2022-02-12'), (2, 'TX', 'Auto', 3500, '2022-06-23'), (3, 'TX', 'Renters', 1200, '2022-01-14');", "sql": "SELECT MAX(claim_amount) FROM claims WHERE state = 'TX' AND policy_type = 'Renters' AND MONTH(claim_date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the earliest year the new york giants lost at new meadowlands stadium?", "schema": "CREATE TABLE table_name_31 (year INTEGER, loser VARCHAR, location VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_31 WHERE loser = 'new york giants' AND location = 'new meadowlands stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the average number of shots on goal for each team in the teams table, grouped by their league, and only for teams who have more than 200 shots on goal in total?", "schema": "CREATE TABLE teams_stats (team_id INT PRIMARY KEY, team_name VARCHAR(255), league VARCHAR(50), shots_on_goal INT, FOREIGN KEY (team_id) REFERENCES teams(team_id));", "sql": "SELECT league, AVG(shots_on_goal) as avg_shots_on_goal FROM teams_stats GROUP BY league HAVING SUM(shots_on_goal) > 200;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was Outback Steakhouse Pro-AM held?", "schema": "CREATE TABLE table_15346009_1 (location VARCHAR, tournament VARCHAR)", "sql": "SELECT location FROM table_15346009_1 WHERE tournament = 'Outback Steakhouse Pro-Am';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Count the number of users who have achieved a step count greater than 15000 for at least 20 days in the last 30 days.", "schema": "CREATE TABLE user_steps (user_id INT, date DATE, steps INT);", "sql": "SELECT COUNT(DISTINCT user_id) FROM user_steps WHERE steps > 15000 GROUP BY user_id HAVING COUNT(DISTINCT date) >= 20 AND date >= CURDATE() - INTERVAL 30 DAY;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What are the total biomass and stock counts for each species in Q1 2022?", "schema": "CREATE TABLE fish_stock (date DATE, species VARCHAR(50), biomass FLOAT, stock_count INTEGER); INSERT INTO fish_stock (date, species, biomass, stock_count) VALUES ('2022-01-01', 'Tilapia', 2500, 5000);", "sql": "SELECT EXTRACT(QUARTER FROM date) as quarter, species, SUM(biomass) as total_biomass, SUM(stock_count) as total_stock_count FROM fish_stock WHERE date >= '2022-01-01' AND date <= '2022-03-31' GROUP BY quarter, species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "What is the average number of hours volunteered per volunteer?", "schema": "CREATE TABLE Volunteer_Hours (id INT, hours INT, volunteer_id INT, month INT); CREATE TABLE Volunteers (id INT, name TEXT);", "sql": "SELECT AVG(hours) as avg_hours_per_volunteer FROM Volunteer_Hours JOIN Volunteers ON Volunteer_Hours.volunteer_id = Volunteers.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'isn' (example 4).", "schema": null, "sql": "SELECT '9780123456786'::ISBN,\n '123456789X'::ISBN,\n '9780123456786'::ISBN13::ISBN,\n '9780123456786'::EAN13::ISBN;", "explanation": "Example query from the 'isn' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "What is the minimum price of eco-friendly tours in Costa Rica?", "schema": "CREATE TABLE eco_tours (tour_id INT, name VARCHAR(255), country VARCHAR(255), price FLOAT, eco_friendly BOOLEAN); INSERT INTO eco_tours (tour_id, name, country, price, eco_friendly) VALUES (1, 'Rainforest Adventure', 'Costa Rica', 120.0, true), (2, 'Volcano Hike', 'Costa Rica', 80.0, true), (3, 'Beach Cleanup', 'Costa Rica', 50.0, true);", "sql": "SELECT MIN(price) FROM eco_tours WHERE country = 'Costa Rica' AND eco_friendly = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Which programs had the highest increase in total expenses compared to the same quarter last year?", "schema": "CREATE TABLE programs (id INT, program_name VARCHAR(50), quarter INT, year INT, expenses DECIMAL(10,2)); INSERT INTO programs (id, program_name, quarter, year, expenses) VALUES (1, 'Education', 1, 2021, 15000.00), (2, 'Health', 2, 2021, 20000.00), (3, 'Education', 1, 2022, 17000.00), (4, 'Health', 2, 2022, 25000.00);", "sql": "SELECT program_name, (expenses - (SELECT expenses FROM programs p2 WHERE p2.program_name = programs.program_name AND p2.quarter = programs.quarter AND p2.year = programs.year - 1)) AS difference INTO tmp_table FROM programs ORDER BY difference DESC LIMIT 1; SELECT program_name, difference FROM tmp_table; DROP TABLE tmp_table;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 327, "num_statements": 3} {"question": "What is the average time to complete threat intelligence reports, by region, for the top three contractors in the defense industry in the past year?", "schema": "CREATE TABLE threat_intelligence_reports (report_id INT, report_date DATE, contractor TEXT, region TEXT, report_description TEXT, completion_date DATE); INSERT INTO threat_intelligence_reports (report_id, report_date, contractor, region, report_description, completion_date) VALUES (1, '2022-02-01', 'ACME Inc', 'Northeast', 'Cyber threat', '2022-02-05'), (2, '2022-03-15', 'Beta Corp', 'Midwest', 'Physical security threat', '2022-03-17'), (3, '2022-06-30', 'Gamma Industries', 'Southwest', 'Insider threat', '2022-07-05');", "sql": "SELECT region, AVG(DATEDIFF(completion_date, report_date)) as avg_time_to_complete FROM threat_intelligence_reports WHERE contractor IN (SELECT contractor FROM (SELECT contractor, COUNT(*) as num_reports FROM threat_intelligence_reports GROUP BY contractor ORDER BY num_reports DESC LIMIT 3) as top_three_contractors) GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 334, "num_statements": 1} {"question": "What is the total number of electric vehicle adoption statistics records for each country in the database?", "schema": "CREATE TABLE ElectricVehicleAdoptionStatistics (Id INT, Country VARCHAR(50), AdoptionRate DECIMAL(5,2), Year INT); INSERT INTO ElectricVehicleAdoptionStatistics (Id, Country, AdoptionRate, Year) VALUES (1, 'USA', 0.12, 2018); INSERT INTO ElectricVehicleAdoptionStatistics (Id, Country, AdoptionRate, Year) VALUES (2, 'China', 0.23, 2018); INSERT INTO ElectricVehicleAdoptionStatistics (Id, Country, AdoptionRate, Year) VALUES (3, 'Germany', 0.08, 2018); INSERT INTO ElectricVehicleAdoptionStatistics (Id, Country, AdoptionRate, Year) VALUES (4, 'Japan', 0.17, 2018);", "sql": "SELECT Country, COUNT(*) FROM ElectricVehicleAdoptionStatistics GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Delete all records from the inventory table that are more than 30 days old.", "schema": "CREATE TABLE inventory (id INT, ingredient_name TEXT, quantity INT, last_updated DATE);", "sql": "DELETE FROM inventory WHERE last_updated < DATE(NOW()) - INTERVAL 30 DAY;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What language did the word mahjong originate from?", "schema": "CREATE TABLE table_name_31 (source_language VARCHAR, meaning VARCHAR)", "sql": "SELECT source_language FROM table_name_31 WHERE meaning = 'mahjong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average productivity of workers in the gold mines, categorized by their roles, for the year 2020?", "schema": "CREATE TABLE gold_mines (id INT, worker_role TEXT, productivity FLOAT, extraction_year INT); INSERT INTO gold_mines (id, worker_role, productivity, extraction_year) VALUES (1, 'Engineer', 12.5, 2020), (2, 'Miner', 8.3, 2020), (3, 'Supervisor', 10.8, 2020);", "sql": "SELECT worker_role, AVG(productivity) FROM gold_mines WHERE extraction_year = 2020 GROUP BY worker_role;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the team in which Tom Pryce was in Pole Position race?", "schema": "CREATE TABLE table_name_10 (location VARCHAR, pole_position VARCHAR)", "sql": "SELECT location FROM table_name_10 WHERE pole_position = 'tom pryce';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What were the total sales revenues for the drug \"Humira\" by company in 2019?", "schema": "CREATE TABLE pharmaceutical_sales (company VARCHAR(255), drug VARCHAR(255), qty_sold INT, sales_revenue FLOAT, sale_date DATE); INSERT INTO pharmaceutical_sales (company, drug, qty_sold, sales_revenue, sale_date) VALUES ('AbbVie', 'Humira', 300, 18000.00, '2019-01-01');", "sql": "SELECT company, SUM(sales_revenue) FROM pharmaceutical_sales WHERE drug = 'Humira' AND sale_date BETWEEN '2019-01-01' AND '2019-12-31' GROUP BY company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What was the most recent Mars rover launch?", "schema": "CREATE TABLE Mars_Rover_Launches (rover_name TEXT, launch_date DATE); INSERT INTO Mars_Rover_Launches (rover_name, launch_date) VALUES ('Sojourner', '1996-12-04'), ('Spirit', '2003-06-10'), ('Opportunity', '2003-07-07'), ('Curiosity', '2011-11-26'), ('Perseverance', '2020-07-30');", "sql": "SELECT rover_name, launch_date FROM Mars_Rover_Launches ORDER BY launch_date DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total funding amount for startups founded by people from underrepresented racial or ethnic groups in the USA?", "schema": "CREATE TABLE startups (id INT, name TEXT, location TEXT, founder_race TEXT, funding_amount INT); INSERT INTO startups (id, name, location, founder_race, funding_amount) VALUES (1, 'Startup A', 'USA', 'African American', 3000000); INSERT INTO startups (id, name, location, founder_race, funding_amount) VALUES (2, 'Startup B', 'Canada', 'Caucasian', 5000000); INSERT INTO startups (id, name, location, founder_race, funding_amount) VALUES (3, 'Startup C', 'USA', 'Hispanic', 4000000);", "sql": "SELECT SUM(funding_amount) FROM startups WHERE location = 'USA' AND founder_race IN ('African American', 'Hispanic', 'Native American', 'Pacific Islander');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Find the difference in revenue between restaurants 1 and 2. Display the result as a single value.", "schema": "CREATE TABLE restaurant_revenue (restaurant_id INT, revenue INT); INSERT INTO restaurant_revenue (restaurant_id, revenue) VALUES (1, 1200), (2, 1500), (3, 800), (4, 2000), (5, 1700);", "sql": "SELECT ABS(SUM(r1.revenue) - SUM(r2.revenue)) as revenue_difference FROM restaurant_revenue r1, restaurant_revenue r2 WHERE r1.restaurant_id = 1 AND r2.restaurant_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the total revenue for each sales region?", "schema": "CREATE TABLE sales_region (id INT, region VARCHAR(255), revenue INT); INSERT INTO sales_region (id, region, revenue) VALUES (1, 'Northeast', 500000), (2, 'Southeast', 600000), (3, 'Midwest', 400000), (4, 'Southwest', 700000), (5, 'West', 800000);", "sql": "SELECT region, SUM(revenue) FROM sales_region GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total budget for all dam projects with more than 10 million budget?", "schema": "CREATE TABLE Projects (id INT PRIMARY KEY, name VARCHAR(255), budget INT, start_date DATE, end_date DATE, Infrastructure_id INT, FOREIGN KEY (Infrastructure_id) REFERENCES Infrastructure(id));", "sql": "SELECT SUM(budget) as total_budget FROM Projects WHERE Infrastructure_id IN (SELECT id FROM Infrastructure WHERE type = 'Dam') GROUP BY Infrastructure_id HAVING total_budget > 10000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "What was the total funding received by programs targeting 'youth'?", "schema": "CREATE TABLE Programs (program_id INT, target_group VARCHAR(50), funding_amount DECIMAL(10,2), funding_date DATE); INSERT INTO Programs (program_id, target_group, funding_amount, funding_date) VALUES (1, 'youth', 5000.00, '2021-01-01'), (2, 'seniors', 7000.00, '2021-02-01'), (3, 'adults', 3000.00, '2021-03-01');", "sql": "SELECT SUM(funding_amount) AS total_funding FROM Programs WHERE target_group = 'youth';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'create_view': Write the SELECT query (example 283).", "schema": null, "sql": "select pg_get_viewdef('tt23v', true);", "explanation": "Regression test for Create View in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_viewdef('tt23v', true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which average Points have a Lost larger than 2, and Drawn larger than 2, and a Difference of 0?", "schema": "CREATE TABLE table_name_74 (points INTEGER, difference VARCHAR, lost VARCHAR, drawn VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_74 WHERE lost > 2 AND drawn > 2 AND difference = '0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average depth of all marine protected areas in the Pacific Ocean?", "schema": "CREATE TABLE marine_protected_areas (name TEXT, location TEXT, avg_depth FLOAT); CREATE TABLE ocean_regions (name TEXT, area FLOAT);", "sql": "SELECT AVG(avg_depth) FROM marine_protected_areas WHERE location = (SELECT name FROM ocean_regions WHERE area = 'Pacific Ocean');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What race has a distance of 1200 m?", "schema": "CREATE TABLE table_2062148_3 (race VARCHAR, distance VARCHAR)", "sql": "SELECT race FROM table_2062148_3 WHERE distance = '1200 m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 619).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 68).", "schema": null, "sql": "SELECT pg_partition_root('ptif_test_matview');", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_partition_root('ptif_test_matview')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Insert new satellite image", "schema": "CREATE TABLE satellite_images (id INT PRIMARY KEY, farm_id INT, image_url VARCHAR(100), capture_date TIMESTAMP);", "sql": "INSERT INTO satellite_images (id, farm_id, image_url, capture_date) VALUES (1, 123, 'https://example.com/image1.jpg', '2022-02-01 14:30:00');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the count for obese children and adolescents ranked 48?", "schema": "CREATE TABLE table_name_1 (obese_children_and_adolescents VARCHAR, obesity_rank VARCHAR)", "sql": "SELECT obese_children_and_adolescents FROM table_name_1 WHERE obesity_rank = 48;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "pgTAP test for Index (assertion 68).", "schema": null, "sql": "/****************************************************************************/\n-- Test index_is_primary().\nSELECT * FROM check_test(\n index_is_primary( 'public', 'sometab', 'sometab_pkey', 'whatever' ),\n true,\n 'index_is_primary()',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1} {"question": "What is the total biomass of each species in the Arctic Ocean by year?", "schema": "CREATE TABLE ArcticOceanBiomassYearly(year INT, species VARCHAR(50), biomass FLOAT);INSERT INTO ArcticOceanBiomassYearly(year, species, biomass) VALUES(2018, 'Polar Bear', 550.0), (2019, 'Seal', 210.0), (2020, 'Walrus', 1050.0), (2018, 'Polar Bear', 600.0), (2019, 'Seal', 220.0);", "sql": "SELECT species, year, SUM(biomass) FROM ArcticOceanBiomassYearly GROUP BY species, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total number of members in all unions?", "schema": "CREATE TABLE union_membership (id INT, union VARCHAR(20), member_count INT); INSERT INTO union_membership (id, union, member_count) VALUES (1, 'construction', 3500), (2, 'education', 8000), (3, 'manufacturing', 5000);", "sql": "SELECT SUM(member_count) FROM union_membership;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (zombodb--3000.1.14--3000.1.15, item 2).", "schema": null, "sql": "CREATE FUNCTION zdb.schema_version() RETURNS text LANGUAGE sql AS $$\nSELECT '3000.1.15 (4edcf0acb878db742412f1757abf17e9441e0efa)'\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Create a new table for storing concert ticket sales data, with columns for concert_id, artist_name, city, and revenue.", "schema": "CREATE SCHEMA music; USE music;", "sql": "CREATE TABLE concert_ticket_sales (concert_id INT, artist_name VARCHAR(255), city VARCHAR(255), revenue FLOAT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the internet penetration in African countries with a total population greater than 20 million?", "schema": "CREATE TABLE internet_access (id INT, country VARCHAR(50), urban_population FLOAT, rural_population FLOAT, total_population FLOAT, internet_users FLOAT); INSERT INTO internet_access (id, country, urban_population, rural_population, total_population, internet_users) VALUES (3, 'Nigeria', 71.4, 57.1, 200.96, 112.03);", "sql": "SELECT country, (internet_users / total_population) * 100 as internet_penetration FROM internet_access WHERE total_population > 20000000 AND country IN ('Nigeria', 'South Africa', 'Egypt', 'Ethiopia', 'Kenya') ORDER BY internet_penetration DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 245, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 90).", "schema": null, "sql": "SELECT '{\"a\":\"b\"}'::jsonb <@ '{\"a\":\"b\", \"b\":1, \"c\":null}';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '{\"a\":\"b\"}'::jsonb <@ '{\"a\":\"b\", \"b\":1, \"c\":null}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 58, "num_statements": 1} {"question": "What is the maximum amount of water consumed in a single day in the 'environmental_data' table?", "schema": "CREATE TABLE environmental_data (id INT, date DATE, water_consumption INT); INSERT INTO environmental_data (id, date, water_consumption) VALUES (1, '2022-01-01', 5000); INSERT INTO environmental_data (id, date, water_consumption) VALUES (2, '2022-01-02', 5500);", "sql": "SELECT MAX(water_consumption) as max_water_consumption FROM environmental_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the investment earnings in the year that State and Federal taxes were $13,999,169?", "schema": "CREATE TABLE table_name_26 (investment_earnings VARCHAR, state_ VARCHAR, _federal VARCHAR)", "sql": "SELECT investment_earnings FROM table_name_26 WHERE state_ & _federal = '13,999,169';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the minimum energy storage capacity by technology in 2030?", "schema": "CREATE TABLE energy_storage_2030 (technology VARCHAR(255), capacity FLOAT); INSERT INTO energy_storage_2030 (technology, capacity) VALUES ('Lithium-ion', 18000.0), ('Flow', 25000.1), ('Sodium-ion', 32000.2);", "sql": "SELECT technology, MIN(capacity) AS min_capacity FROM energy_storage_2030 GROUP BY technology;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the home team score for richmond home team", "schema": "CREATE TABLE table_name_87 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_87 WHERE home_team = 'richmond';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the age and nationality distribution of employees?", "schema": "CREATE TABLE EmployeeDemographics (EmployeeID INT, Age INT, Nationality VARCHAR(50)); INSERT INTO EmployeeDemographics (EmployeeID, Age, Nationality) VALUES (1, 30, 'American'), (2, 40, 'Canadian'), (3, 35, 'British'), (4, 28, 'Mexican'), (5, 45, 'German'), (6, 32, 'Indian');", "sql": "SELECT Nationality, Age, COUNT(*) FROM EmployeeDemographics GROUP BY Nationality, Age;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the total attendance of games against the New York Giants after week 13?", "schema": "CREATE TABLE table_name_51 (attendance VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_51 WHERE week > 13 AND opponent = 'new york giants';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Compare the average budget allocated for education in Mexico and Argentina in 2018.", "schema": "CREATE SCHEMA latam_schema;CREATE TABLE latam_schema.education_budget (country VARCHAR(20), year INT, avg_budget DECIMAL(5,2));INSERT INTO latam_schema.education_budget (country, year, avg_budget) VALUES ('Mexico', 2018, 3000.00), ('Argentina', 2018, 4000.00);", "sql": "SELECT country, avg_budget FROM latam_schema.education_budget WHERE (country = 'Mexico' OR country = 'Argentina') AND year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Update the conservation_efforts table to reflect a successful conservation project for species with id 3 and 7", "schema": "CREATE TABLE conservation_efforts (id INT, species_id INT, project_status VARCHAR(20));", "sql": "UPDATE conservation_efforts SET project_status = 'successful' WHERE species_id IN (3, 7);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the maximum cargo weight transported by a vessel with a compliance score above 90 in the Indian Ocean in 2022?", "schema": "CREATE TABLE vessels (id INT, name TEXT, type TEXT, compliance_score INT);CREATE TABLE cargos (id INT, vessel_id INT, weight FLOAT, destination TEXT, date DATE); INSERT INTO vessels (id, name, type, compliance_score) VALUES (1, 'VesselI', 'Container', 95); INSERT INTO cargos (id, vessel_id, weight, destination, date) VALUES (1, 1, 25000, 'Indian', '2022-03-15');", "sql": "SELECT MAX(c.weight) FROM vessels v JOIN cargos c ON v.id = c.vessel_id WHERE v.compliance_score > 90 AND c.destination = 'Indian' AND c.date BETWEEN '2022-01-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Which menu items have sold more than 500 units in the last 6 months, and how many units were sold?", "schema": "CREATE TABLE menu_sales (menu_item VARCHAR(255), sales_quantity INT, sale_date DATE); INSERT INTO menu_sales (menu_item, sales_quantity, sale_date) VALUES ('Burger', 300, '2022-01-01'); INSERT INTO menu_sales (menu_item, sales_quantity, sale_date) VALUES ('Pizza', 550, '2022-01-02');", "sql": "SELECT menu_item, SUM(sales_quantity) as total_sold FROM menu_sales WHERE sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY menu_item HAVING total_sold > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Find the number of employees in each department who have been with the company for more than five years.", "schema": "CREATE TABLE employee (id INT, name VARCHAR(255), gender VARCHAR(50), ethnicity VARCHAR(50), department_id INT, hire_date DATE);", "sql": "SELECT department.name AS department, COUNT(*) AS employee_count FROM department INNER JOIN employee ON department.id = employee.department_id WHERE DATEDIFF(CURDATE(), hire_date) > 5 * 365 GROUP BY department.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of games lost when 22 have been played, the losing BP number is 2, and the club is Merthyr RFC?", "schema": "CREATE TABLE table_name_52 (lost VARCHAR, club VARCHAR, played VARCHAR, losing_bp VARCHAR)", "sql": "SELECT lost FROM table_name_52 WHERE played = '22' AND losing_bp = '2' AND club = 'merthyr rfc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 76).", "schema": null, "sql": "SELECT pg_drop_replication_slot('failover_true_slot');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date for eastern creek raceway", "schema": "CREATE TABLE table_2446333_2 (date VARCHAR, circuit VARCHAR)", "sql": "SELECT date FROM table_2446333_2 WHERE circuit = 'Eastern Creek Raceway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 472).", "schema": null, "sql": "update rules_src set f2 = f2 / 10;", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What is the total installed capacity (in MW) of renewable energy projects in India that were completed after 2018, grouped by type?", "schema": "CREATE TABLE india_renewable_projects (name TEXT, type TEXT, completion_date DATE, capacity_mw REAL); INSERT INTO india_renewable_projects (name, type, completion_date, capacity_mw) VALUES ('Solar Project 1', 'Solar', '2019-01-01', 50), ('Wind Project 2', 'Wind', '2020-01-01', 75);", "sql": "SELECT type, SUM(capacity_mw) FROM india_renewable_projects WHERE completion_date > '2018-12-31' GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the average age of astronauts from Japan during their first space mission in 2025?", "schema": "CREATE TABLE AstronautsT (astronaut_name VARCHAR(30), age INT, first_mission_date DATE, nationality VARCHAR(20)); INSERT INTO AstronautsT (astronaut_name, age, first_mission_date, nationality) VALUES ('Astronaut5', 50, '2025-01-01', 'Japan');", "sql": "SELECT AVG(age) FROM AstronautsT WHERE nationality = 'Japan' AND first_mission_date = (SELECT MIN(first_mission_date) FROM AstronautsT WHERE nationality = 'Japan' AND YEAR(first_mission_date) = 2025);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all different genre types.", "schema": "CREATE TABLE genres (name VARCHAR)", "sql": "SELECT DISTINCT name FROM genres;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "pgTAP test for Index (assertion 101).", "schema": null, "sql": "SELECT * FROM check_test(\n index_is_type( 'idx_bar', 'hash' ),\n false,\n 'index_is_type() no table fail',\n 'Index idx_bar should be a hash index',\n ' have: btree\n want: hash'\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What catalog has june 1990 as the date?", "schema": "CREATE TABLE table_name_73 (catalog VARCHAR, date VARCHAR)", "sql": "SELECT catalog FROM table_name_73 WHERE date = 'june 1990';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average age of community health workers by state?", "schema": "CREATE TABLE community_health_workers (worker_id INT, name TEXT, age INT, state TEXT); INSERT INTO community_health_workers (worker_id, name, age, state) VALUES (1, 'Alice', 45, 'NY'), (2, 'Bob', 35, 'CA'), (3, 'Charlie', 50, 'NY'), (4, 'Diana', 30, 'CA');", "sql": "SELECT state, AVG(age) FROM community_health_workers GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Attendance larger than 55,189 is which average game?", "schema": "CREATE TABLE table_name_25 (game INTEGER, attendance INTEGER)", "sql": "SELECT AVG(game) FROM table_name_25 WHERE attendance > 55 OFFSET 189;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Delete records from the financial_wellbeing table where the financial_score is less than 65 or the date is after 2022-02-28.", "schema": "CREATE TABLE financial_wellbeing (id INT, name VARCHAR(50), financial_score INT, date DATE); INSERT INTO financial_wellbeing (id, name, financial_score, date) VALUES (1, 'John', 75, '2020-01-05'), (2, 'Jane', 85, '2019-12-31'), (3, 'Mike', 55, '2019-11-15'), (4, 'Lucy', 90, '2020-03-01'), (5, 'Zainab', 70, '2022-05-01'), (5, 'Zainab', 75, '2022-06-01');", "sql": "DELETE FROM financial_wellbeing WHERE financial_score < 65 OR date > '2022-02-28';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Calculate the average sustainability score for accommodations in Australia and New Zealand.", "schema": "CREATE TABLE accommodations (id INT, country VARCHAR(50), accommodation_type VARCHAR(50), sustainability_score INT); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (1, 'Australia', 'Hotel', 75); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (2, 'Australia', 'Resort', 80); INSERT INTO accommodations (id, country, accommodation_type, sustainability_score) VALUES (3, 'New Zealand', 'Hostel', 85);", "sql": "SELECT AVG(sustainability_score) FROM accommodations WHERE country IN ('Australia', 'New Zealand');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which region has the format CD and label MCA?", "schema": "CREATE TABLE table_name_15 (region VARCHAR, format VARCHAR, label VARCHAR)", "sql": "SELECT region FROM table_name_15 WHERE format = 'cd' AND label = 'mca';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most season for old hoss radbourn", "schema": "CREATE TABLE table_242813_2 (season INTEGER, pitcher VARCHAR)", "sql": "SELECT MAX(season) FROM table_242813_2 WHERE pitcher = 'Old Hoss Radbourn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Summarize the number of regulatory frameworks implemented in the Americas (North and South) between 2019 and 2021.", "schema": "CREATE TABLE regulatory_frameworks (id INT, name VARCHAR(255), country VARCHAR(255), implementation_date DATE); INSERT INTO regulatory_frameworks (id, name, country, implementation_date) VALUES (1, 'Framework 1', 'USA', '2020-05-01'), (2, 'Framework 2', 'Brazil', '2021-02-15');", "sql": "SELECT COUNT(*) FROM regulatory_frameworks WHERE country IN ('USA', 'Brazil') AND implementation_date BETWEEN '2019-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest River Mile that has a pool length of 42.2 miles or a lock/lift drop of 21 feet?", "schema": "CREATE TABLE table_name_71 (river_mile INTEGER, pool_length__miles_ VARCHAR, lock_lift_drop__in_feet_ VARCHAR)", "sql": "SELECT MAX(river_mile) FROM table_name_71 WHERE pool_length__miles_ = '42.2' AND lock_lift_drop__in_feet_ = '21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player had the high point on July 7?", "schema": "CREATE TABLE table_name_21 (high_points VARCHAR, date VARCHAR)", "sql": "SELECT high_points FROM table_name_21 WHERE date = 'july 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Moretap (assertion 8).", "schema": null, "sql": "SELECT is( _set('plan', 8), 8, 'Increase internal plan value after testing finish' );", "explanation": "SQL assertion from pgTAP test suite for Moretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Total is the lowest one that has a Country of united states, and a To par of +12?", "schema": "CREATE TABLE table_name_99 (total INTEGER, country VARCHAR, to_par VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_99 WHERE country = 'united states' AND to_par = '+12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the score when the opponent is fernando vicente?", "schema": "CREATE TABLE table_name_9 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_name_9 WHERE opponent = 'fernando vicente';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest SP+FS that has 131.02 Points, and a Rank larger than 15?", "schema": "CREATE TABLE table_name_84 (fs VARCHAR, sp INTEGER, points VARCHAR, rank VARCHAR)", "sql": "SELECT MAX(sp) + fs FROM table_name_84 WHERE points = 131.02 AND rank > 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore_plperl' (example 10).", "schema": null, "sql": "SELECT test1arr(array['aa=>bb, cc=>NULL'::hstore, 'dd=>ee']);", "explanation": "Example query from the 'hstore_plperl' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average expenditure by tourists from Germany in Berlin, Munich, and Hamburg combined?", "schema": "CREATE TABLE tourism_stats (visitor_country VARCHAR(20), destination VARCHAR(20), expenditure DECIMAL(10,2)); INSERT INTO tourism_stats (visitor_country, destination, expenditure) VALUES ('Germany', 'Berlin', 400.00), ('Germany', 'Berlin', 450.00), ('Germany', 'Munich', 350.00), ('Germany', 'Hamburg', 300.00);", "sql": "SELECT AVG(expenditure) FROM (SELECT expenditure FROM tourism_stats WHERE visitor_country = 'Germany' AND destination IN ('Berlin', 'Munich', 'Hamburg')) subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of television channels when one of the radio stations is onda madrid?", "schema": "CREATE TABLE table_23143607_1 (television_channels VARCHAR, radio_stations VARCHAR)", "sql": "SELECT COUNT(television_channels) FROM table_23143607_1 WHERE radio_stations = 'Onda Madrid';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Count the number of carbon offset initiatives in each city", "schema": "CREATE TABLE city_carbon_offsets (city VARCHAR(50), offset_initiative VARCHAR(50), PRIMARY KEY (city, offset_initiative));", "sql": "SELECT city, COUNT(*) FROM city_carbon_offsets GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Present the number of hospitals in each state", "schema": "CREATE TABLE hospitals (hospital_id INT, name VARCHAR(255), state VARCHAR(255)); INSERT INTO hospitals (hospital_id, name, state) VALUES (1, 'Johns Hopkins Hospital', 'Maryland'); INSERT INTO hospitals (hospital_id, name, state) VALUES (2, 'Massachusetts General Hospital', 'Massachusetts');", "sql": "SELECT state, COUNT(*) FROM hospitals GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the highest league of the Nova Scotia Clippers with a total season of less than 4?", "schema": "CREATE TABLE table_name_2 (highest_league VARCHAR, total_seasons VARCHAR, team VARCHAR)", "sql": "SELECT highest_league FROM table_name_2 WHERE total_seasons < 4 AND team = 'nova scotia clippers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What volume number has the ISBN of 978-1-59582-712-8 (hc) 978-1-59582-713-5 (tpb)?", "schema": "CREATE TABLE table_name_38 (volume VARCHAR, isbn VARCHAR)", "sql": "SELECT volume FROM table_name_38 WHERE isbn = '978-1-59582-712-8 (hc) 978-1-59582-713-5 (tpb)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Calculate the total quantity of products in all categories", "schema": "CREATE TABLE products (product_id INT, category VARCHAR(20), quantity INT); INSERT INTO products (product_id, category, quantity) VALUES (1, 'apparel', 50), (2, 'accessories', 70), (3, 'home_decor', 40), (4, 'electronics', 600), (5, 'grocery', 60), (6, 'toys', 30), (7, 'furniture', 150);", "sql": "SELECT SUM(quantity) FROM products;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "What is the average risk score for all green infrastructure projects in the Asia Pacific region?", "schema": "CREATE TABLE infrastructure_projects (project_id INT, project_name TEXT, sector TEXT, region TEXT, risk_score INT); INSERT INTO infrastructure_projects (project_id, project_name, sector, region, risk_score) VALUES (1, 'Project C', 'Green Infrastructure', 'Asia Pacific', 12), (2, 'Project D', 'Green Infrastructure', 'Europe', 10);", "sql": "SELECT AVG(risk_score) FROM infrastructure_projects WHERE sector = 'Green Infrastructure' AND region = 'Asia Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "How many chemical engineers were employed in each plant in the past quarter?", "schema": "CREATE TABLE plants (plant_id INT, plant_name VARCHAR(50)); CREATE TABLE employees (employee_id INT, plant_id INT, employee_type VARCHAR(50), employment_date DATE); INSERT INTO plants (plant_id, plant_name) VALUES (1, 'Plant A'), (2, 'Plant B'); INSERT INTO employees (employee_id, plant_id, employee_type, employment_date) VALUES (1, 1, 'Chemical Engineer', '2022-01-01'), (2, 2, 'Chemical Engineer', '2022-01-01');", "sql": "SELECT plants.plant_name, COUNT(employees.employee_id) FROM plants INNER JOIN employees ON plants.plant_id = employees.plant_id WHERE employees.employee_type = 'Chemical Engineer' AND employees.employment_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH) GROUP BY plants.plant_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 278, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the description of the type of the company who concluded its contracts most recently?", "schema": "CREATE TABLE Maintenance_Contracts (maintenance_contract_company_id VARCHAR, contract_end_date VARCHAR); CREATE TABLE Third_Party_Companies (company_name VARCHAR, company_id VARCHAR, company_type_code VARCHAR); CREATE TABLE Ref_Company_Types (company_type_code VARCHAR)", "sql": "SELECT T1.company_name FROM Third_Party_Companies AS T1 JOIN Maintenance_Contracts AS T2 ON T1.company_id = T2.maintenance_contract_company_id JOIN Ref_Company_Types AS T3 ON T1.company_type_code = T3.company_type_code ORDER BY T2.contract_end_date DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 262, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 493).", "schema": null, "sql": "CREATE INDEX concur_reindex_part_index_0 ON ONLY concur_reindex_part_0 (c1);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 43).", "schema": null, "sql": "SELECT relid, parentrelid, level, isleaf\n FROM pg_partition_tree('ptif_test0_index') p\n JOIN pg_class c ON (p.relid = c.oid);", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT relid, parentrelid, level, isleaf\n FROM pg_partition_tree('ptif_test0_index') p\n JOIN pg_class c ON (p.relid = c.oid)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 127, "num_statements": 1} {"question": "List the names and professional development hours of teachers who have participated in workshops on open pedagogy and lifelong learning, sorted alphabetically by name.", "schema": "CREATE TABLE Teachers (teacher_id INT, name VARCHAR(255), professional_development_hours INT); CREATE TABLE Workshops (workshop_id INT, name VARCHAR(255), topic VARCHAR(255)); INSERT INTO Workshops (workshop_id, name, topic) VALUES (1, 'Open Pedagogy Workshop', 'open pedagogy'), (2, 'Lifelong Learning Seminar', 'lifelong learning'); CREATE TABLE TeacherWorkshops (teacher_id INT, workshop_id INT);", "sql": "SELECT Teachers.name, Teachers.professional_development_hours FROM Teachers INNER JOIN TeacherWorkshops ON Teachers.teacher_id = TeacherWorkshops.teacher_id INNER JOIN Workshops ON TeacherWorkshops.workshop_id = Workshops.workshop_id WHERE Workshops.topic IN ('open pedagogy', 'lifelong learning') ORDER BY Teachers.name ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 325, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 387).", "schema": null, "sql": "CREATE FUNCTION hashisbn13(isbn13)\n\tRETURNS int4\n\tAS 'hashint8'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 47).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Lauren');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the average energy efficiency rating for renewable energy projects in 'Urban Area X'?", "schema": "CREATE TABLE renewable_projects (project_id INT, project_name TEXT, location TEXT, energy_efficiency_rating FLOAT); INSERT INTO renewable_projects (project_id, project_name, location, energy_efficiency_rating) VALUES (1, 'Solar Farm A', 'Rural Region Y', 0.23), (2, 'Wind Farm B', 'Rural Region X', 0.35), (3, 'Hydro Plant C', 'Rural Region Y', 0.42), (4, 'Solar Farm D', 'Urban Area X', 0.50);", "sql": "SELECT AVG(energy_efficiency_rating) as avg_rating FROM renewable_projects WHERE location = 'Urban Area X';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many laps were there for the car with a qual of 138.750?", "schema": "CREATE TABLE table_name_3 (laps VARCHAR, qual VARCHAR)", "sql": "SELECT laps FROM table_name_3 WHERE qual = '138.750';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who has a nationality of Czech Republic and a position of right wing?", "schema": "CREATE TABLE table_name_49 (player VARCHAR, position VARCHAR, nationality VARCHAR)", "sql": "SELECT player FROM table_name_49 WHERE position = 'right wing' AND nationality = 'czech republic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Insert a new excavation site named \"La Santana\" into the excavation_sites table.", "schema": "CREATE TABLE excavation_sites (id INT, name VARCHAR(255));", "sql": "INSERT INTO excavation_sites (id, name) VALUES ((SELECT MAX(id) FROM excavation_sites) + 1, 'La Santana');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the Competition of World Junior Championships with a 20th (qf) position?", "schema": "CREATE TABLE table_name_18 (year INTEGER, competition VARCHAR, position VARCHAR)", "sql": "SELECT AVG(year) FROM table_name_18 WHERE competition = 'world junior championships' AND position = '20th (qf)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the total number of years for lady cecily waynflete and opera house, kennedy center", "schema": "CREATE TABLE table_name_51 (year VARCHAR, role VARCHAR, theatre VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_51 WHERE role = 'lady cecily waynflete' AND theatre = 'opera house, kennedy center';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 539).", "schema": null, "sql": "select intr_multirange(intr(1,10));", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select intr_multirange(intr(1,10))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of the episode with a production code of 1gowo04?", "schema": "CREATE TABLE table_name_3 (episode_title VARCHAR, prod_code VARCHAR)", "sql": "SELECT episode_title FROM table_name_3 WHERE prod_code = '1gowo04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 12).", "schema": null, "sql": "CREATE FUNCTION gen_random_bytes(int4)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'pg_random_bytes'\nLANGUAGE C VOLATILE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'plpgsql' (example 711).", "schema": null, "sql": "CREATE TRIGGER transition_table_base_ins_trig\n AFTER INSERT ON transition_table_base\n REFERENCING OLD TABLE AS oldtable NEW TABLE AS newtable\n FOR EACH STATEMENT\n EXECUTE PROCEDURE transition_table_base_ins_func();", "explanation": "DDL from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 218, "num_statements": 1} {"question": "How many tourists visited each region in 2020?", "schema": "CREATE TABLE region (region_code CHAR(2), region_name VARCHAR(50)); INSERT INTO region VALUES ('NA', 'North America'), ('EU', 'Europe'); CREATE TABLE visit_summary (region_code CHAR(2), year INT, visitor_count INT); INSERT INTO visit_summary VALUES ('NA', 2020, 1000), ('NA', 2019, 1200), ('EU', 2020, 2000), ('EU', 2019, 2500);", "sql": "SELECT region_code, SUM(visitor_count) OVER (PARTITION BY region_code) FROM visit_summary WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which stage has kourdali as the name?", "schema": "CREATE TABLE table_21578303_2 (stage VARCHAR, name VARCHAR)", "sql": "SELECT stage FROM table_21578303_2 WHERE name = 'Kourdali';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Frequency has a Webcast of •, and a Callsign of xemr?", "schema": "CREATE TABLE table_name_31 (frequency INTEGER, webcast VARCHAR, callsign VARCHAR)", "sql": "SELECT AVG(frequency) FROM table_name_31 WHERE webcast = '•' AND callsign = 'xemr';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the earliest date a parcel was shipped from 'SYD' to 'PEK'?", "schema": "CREATE TABLE shipments (id INT, source_airport VARCHAR(5), destination_airport VARCHAR(5), shipped_date DATE); INSERT INTO shipments (id, source_airport, destination_airport, shipped_date) VALUES (1, 'SYD', 'PEK', '2022-03-02'), (2, 'SYD', 'PEK', '2022-03-10'), (3, 'PEK', 'SYD', '2022-03-15');", "sql": "SELECT MIN(shipped_date) FROM shipments WHERE source_airport = 'SYD' AND destination_airport = 'PEK';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the loss for September 16?", "schema": "CREATE TABLE table_name_62 (loss VARCHAR, date VARCHAR)", "sql": "SELECT loss FROM table_name_62 WHERE date = 'september 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average dissolved oxygen level for each species in the 'fish_data' table?", "schema": "CREATE TABLE fish_data (species VARCHAR(255), dissolved_oxygen FLOAT); INSERT INTO fish_data (species, dissolved_oxygen) VALUES ('Tilapia', 6.5), ('Salmon', 8.5), ('Catfish', 5.5);", "sql": "SELECT species, AVG(dissolved_oxygen) FROM fish_data GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What party did hilda solis represent?", "schema": "CREATE TABLE table_1805191_6 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT party FROM table_1805191_6 WHERE incumbent = 'Hilda Solis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 459).", "schema": null, "sql": "insert into tab_batch_local select i, 'test'|| i from generate_series(1, 45) i;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Count the number of successful PTSD treatment outcomes for veterans in the United States.", "schema": "CREATE TABLE patients (patient_id INT, patient_name VARCHAR(50), condition VARCHAR(50), country VARCHAR(50), veteran_status VARCHAR(50), treatment_outcome VARCHAR(50)); INSERT INTO patients (patient_id, patient_name, condition, country, veteran_status, treatment_outcome) VALUES (1, 'James Smith', 'PTSD', 'USA', 'Veteran', 'Successful'), (2, 'Olivia Brown', 'PTSD', 'USA', 'Civilian', 'Successful');", "sql": "SELECT COUNT(patient_id) FROM patients WHERE patients.condition = 'PTSD' AND patients.country = 'USA' AND patients.veteran_status = 'Veteran' AND patients.treatment_outcome = 'Successful';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What is the total number of public participation events in each city, partitioned by event type?", "schema": "CREATE TABLE PublicEvents (city VARCHAR(50), event_type VARCHAR(50), participation INT); INSERT INTO PublicEvents (city, event_type, participation) VALUES ('CityA', 'Workshop', 50), ('CityA', 'Meeting', 30), ('CityB', 'Workshop', 40), ('CityB', 'Meeting', 60);", "sql": "SELECT city, event_type, SUM(participation) AS total_participation FROM PublicEvents GROUP BY city, event_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'varchar' (example 6).", "schema": null, "sql": "INSERT INTO VARCHAR_TBL (f1) VALUES ('3');", "explanation": "DML from PostgreSQL core regression test for Varchar.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What was the total number of shipments from Vietnam to South Africa in the first half of May 2021?", "schema": "CREATE TABLE shipments (id INT, origin VARCHAR(255), destination VARCHAR(255), shipped_at TIMESTAMP); INSERT INTO shipments (id, origin, destination, shipped_at) VALUES (1, 'Vietnam', 'South Africa', '2021-05-02 10:30:00'), (2, 'Vietnam', 'South Africa', '2021-05-15 15:45:00');", "sql": "SELECT COUNT(*) FROM shipments WHERE origin = 'Vietnam' AND destination = 'South Africa' AND shipped_at >= '2021-05-01' AND shipped_at < '2021-05-16';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total number of flight hours for each aircraft model in the flight_hours table?", "schema": "CREATE TABLE flight_hours (flight_id INT, model_id INT, flight_hours INT);", "sql": "SELECT model_id, SUM(flight_hours) as total_flight_hours FROM flight_hours GROUP BY model_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 109).", "schema": null, "sql": "SELECT '1,0,0'::cube <@ '0,0,1'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which round was the winning constructor was Benetton - Ford and in the Pole Position was Damon Hill?", "schema": "CREATE TABLE table_1137702_3 (round VARCHAR, winning_constructor VARCHAR, pole_position VARCHAR)", "sql": "SELECT round FROM table_1137702_3 WHERE winning_constructor = 'Benetton - Ford' AND pole_position = 'Damon Hill';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What is the total credit and debit amount per day for all customers?", "schema": "CREATE TABLE transactions (id INT, customer_id INT, transaction_date DATE, amount DECIMAL(10,2), type VARCHAR(10)); INSERT INTO transactions (id, customer_id, transaction_date, amount, type) VALUES (1, 1, '2022-01-01', 100.00, 'debit'); INSERT INTO transactions (id, customer_id, transaction_date, amount, type) VALUES (2, 1, '2022-01-02', 50.00, 'credit'); INSERT INTO transactions (id, customer_id, transaction_date, amount, type) VALUES (3, 2, '2022-01-03', 200.00, 'debit');", "sql": "SELECT transaction_date, SUM(CASE WHEN type = 'debit' THEN -amount ELSE amount END) AS total_amount FROM transactions GROUP BY transaction_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the average time to detect a security incident in the education sector in 2022?", "schema": "CREATE TABLE security_incidents (sector VARCHAR(255), year INT, time_to_detect FLOAT); INSERT INTO security_incidents (sector, year, time_to_detect) VALUES ('Education', 2022, 4.2), ('Education', 2022, 5.6), ('Education', 2022, 3.9), ('Education', 2022, 6.1), ('Education', 2022, 4.5);", "sql": "SELECT AVG(time_to_detect) FROM security_incidents WHERE sector = 'Education' AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average budget allocated for language preservation projects in Africa?", "schema": "CREATE TABLE Languages (LanguageID INT PRIMARY KEY, LanguageName VARCHAR(50), Location VARCHAR(50), Budget DECIMAL(10,2)); INSERT INTO Languages (LanguageID, LanguageName, Location, Budget) VALUES (1, 'Swahili', 'Tanzania', 500000.00), (2, 'Hausa', 'Nigeria', 750000.00);", "sql": "SELECT AVG(Budget) FROM Languages WHERE Location LIKE '%Africa%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 69).", "schema": null, "sql": "SELECT * FROM check_test(\n function_lang_is( 'yay', '{}'::name[], 'sql' ),\n true,\n 'function_lang_is(func, 0 args, sql)',\n 'Function yay() should be written in sql',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 338).", "schema": null, "sql": "SELECT lpad('hi'::citext, 5, 'xy'::text ) = 'xyxhi' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average delivery time for shipments that were delivered to warehouses in the state of New York?", "schema": "CREATE TABLE Shipment (shipment_id INT, warehouse_id INT, delivery_date DATE); INSERT INTO Shipment (shipment_id, warehouse_id, delivery_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-02-01'), (3, 3, '2022-03-01'), (4, 1, '2022-04-01'), (5, 2, '2022-05-01'); CREATE TABLE Warehouse (warehouse_id INT, warehouse_name VARCHAR(50), state VARCHAR(50)); INSERT INTO Warehouse (warehouse_id, warehouse_name, state) VALUES (1, 'Los Angeles Warehouse', 'California'), (2, 'New York Warehouse', 'New York'), (3, 'Texas Warehouse', 'Texas');", "sql": "SELECT AVG(DATEDIFF('day', s.delivery_date, w.state_joined_date)) as avg_delivery_time FROM Shipment s JOIN Warehouse w ON s.warehouse_id = w.warehouse_id WHERE w.state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 187).", "schema": null, "sql": "SELECT EXTRACT(ISOYEAR FROM DATE '2020-08-11 BC');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(ISOYEAR FROM DATE '2020-08-11 BC')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 181).", "schema": null, "sql": "SELECT * FROM array_op_test WHERE i <@ '{NULL}' ORDER BY seqno;", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM array_op_test WHERE i <@ '{NULL}' ORDER BY seqno) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the operational period with camp  sajmište", "schema": "CREATE TABLE table_10335_1 (operational VARCHAR, camp VARCHAR)", "sql": "SELECT operational FROM table_10335_1 WHERE camp = 'Sajmište';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2004 population for 山海关区?", "schema": "CREATE TABLE table_name_46 (population__2004_est_ VARCHAR, hanzi VARCHAR)", "sql": "SELECT population__2004_est_ FROM table_name_46 WHERE hanzi = '山海关区';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total revenue from refrigerated cargo shipments in Q1 2021?", "schema": "CREATE TABLE shipments (shipment_id INT, shipment_type TEXT, revenue FLOAT, order_date DATE); INSERT INTO shipments (shipment_id, shipment_type, revenue, order_date) VALUES (1, 'Refrigerated', 3000.00, '2021-01-05'), (2, 'Dry', 4000.00, '2021-01-10');", "sql": "SELECT SUM(revenue) FROM shipments WHERE shipment_type = 'Refrigerated' AND EXTRACT(MONTH FROM order_date) BETWEEN 1 AND 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Cook PVI for the location that has a representative of Mike Thompson?", "schema": "CREATE TABLE table_19283806_4 (cook_pvi VARCHAR, representative VARCHAR)", "sql": "SELECT cook_pvi FROM table_19283806_4 WHERE representative = 'Mike Thompson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "List the vessel names and their total cargo weight for the vessels that have visited Dubai in January 2021 or later?", "schema": "CREATE TABLE Vessel (vessel_id INT PRIMARY KEY, vessel_name VARCHAR(255)); CREATE TABLE Cargo (cargo_id INT, vessel_id INT, cargo_weight INT, PRIMARY KEY (cargo_id, vessel_id)); CREATE TABLE Vessel_Movement (vessel_id INT, movement_date DATE, PRIMARY KEY (vessel_id, movement_date));", "sql": "SELECT V.vessel_name, SUM(C.cargo_weight) FROM Vessel V JOIN Cargo C ON V.vessel_id = C.vessel_id JOIN Vessel_Movement VM ON V.vessel_id = VM.vessel_id WHERE VM.movement_date >= '2021-01-01' AND VM.port_id IN (SELECT port_id FROM Port WHERE port_name = 'Dubai') GROUP BY V.vessel_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "What are the types and quantities of artifacts found in 'SiteA'?", "schema": "CREATE TABLE SiteA (artifact_id INT, artifact_type TEXT, quantity INT); INSERT INTO SiteA (artifact_id, artifact_type, quantity) VALUES (1, 'Pottery', 30), (2, 'Tools', 15), (3, 'Jewelry', 25);", "sql": "SELECT artifact_type, SUM(quantity) FROM SiteA GROUP BY artifact_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 507).", "schema": null, "sql": "select * from nummultirange_test2 where nmr = '{}';", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from nummultirange_test2 where nmr = '{}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least population for 2002 for 2011 being 9564", "schema": "CREATE TABLE table_2562572_5 (population__2002_ INTEGER, population__2011_ VARCHAR)", "sql": "SELECT MIN(population__2002_) FROM table_2562572_5 WHERE population__2011_ = 9564;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date when the opponent in the final is gastón etlis martín rodríguez?", "schema": "CREATE TABLE table_name_22 (date VARCHAR, opponents_in_the_final VARCHAR)", "sql": "SELECT date FROM table_name_22 WHERE opponents_in_the_final = 'gastón etlis martín rodríguez';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which school is located in trafalgar?", "schema": "CREATE TABLE table_name_73 (school VARCHAR, location VARCHAR)", "sql": "SELECT school FROM table_name_73 WHERE location = 'trafalgar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the average delivery time for shipments from South Korea, partitioned by warehouse?", "schema": "CREATE TABLE Warehouses (WarehouseID INT, WarehouseName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Warehouses (WarehouseID, WarehouseName, Country) VALUES (1, 'Seoul Warehouse', 'South Korea'); CREATE TABLE Shipments (ShipmentID INT, WarehouseID INT, DeliveryTime INT);", "sql": "SELECT WarehouseID, AVG(DeliveryTime) OVER (PARTITION BY WarehouseID) AS AvgDeliveryTime FROM Shipments WHERE Country = 'South Korea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title rank of the actor who played the character of arthur hastings during series 1-8, 13?", "schema": "CREATE TABLE table_name_24 (title_rank VARCHAR, series VARCHAR, character VARCHAR)", "sql": "SELECT title_rank FROM table_name_24 WHERE series = '1-8, 13' AND character = 'arthur hastings';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 2010 for 2007 of a and 2012 of a", "schema": "CREATE TABLE table_name_77 (Id VARCHAR)", "sql": "SELECT 2010 FROM table_name_77 WHERE 2007 = 'a' AND 2012 = 'a';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average age of community health workers by race?", "schema": "CREATE TABLE community_health_workers (worker_id INT, name TEXT, age INT, race TEXT); INSERT INTO community_health_workers (worker_id, name, age, race) VALUES (1, 'John Doe', 35, 'White'), (2, 'Jane Smith', 40, 'Black'), (3, 'Maria Garcia', 45, 'Hispanic');", "sql": "SELECT race, AVG(age) FROM community_health_workers GROUP BY race;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is every location where Nick is Bru?", "schema": "CREATE TABLE table_2112260_1 (location VARCHAR, nick VARCHAR)", "sql": "SELECT location FROM table_2112260_1 WHERE nick = 'BRU';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What are the production forecasts for the wells in the North Sea, ranked by production volume in descending order for each production year?", "schema": "CREATE TABLE production_forecasts (forecast_id INT, well_id INT, production_year INT, production_volume FLOAT, region VARCHAR(50)); INSERT INTO production_forecasts (forecast_id, well_id, production_year, production_volume, region) VALUES (5, 5, 2022, 250.6, 'North Sea'); INSERT INTO production_forecasts (forecast_id, well_id, production_year, production_volume, region) VALUES (6, 6, 2023, 235.4, 'North Sea');", "sql": "SELECT forecast_id, well_id, production_year, production_volume, region, ROW_NUMBER() OVER (PARTITION BY production_year ORDER BY production_volume DESC) as rank FROM production_forecasts WHERE region = 'North Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many laps were completed with a start of 33, and a finish of 18?", "schema": "CREATE TABLE table_name_67 (laps INTEGER, start VARCHAR, finish VARCHAR)", "sql": "SELECT MAX(laps) FROM table_name_67 WHERE start = '33' AND finish = '18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average population size of the blue whale for every 2 consecutive years?", "schema": "CREATE TABLE blue_whale_population (year INT, population INT); INSERT INTO blue_whale_population (year, population) VALUES (2000, 9000), (2001, 9500), (2002, 10000), (2003, 10500);", "sql": "SELECT AVG(population) FROM (SELECT population, ROW_NUMBER() OVER (ORDER BY year) / 2 as group_number FROM blue_whale_population) as grouped_populations GROUP BY group_number;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What did billy andrade to par?", "schema": "CREATE TABLE table_name_5 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_5 WHERE player = 'billy andrade';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many incidents were reported for VesselB?", "schema": "CREATE TABLE incidents (id INT, vessel_id INT, incident_type TEXT, reported_date DATE); INSERT INTO incidents (id, vessel_id, incident_type, reported_date) VALUES (1, 2, 'Collision', '2022-01-01');", "sql": "SELECT COUNT(*) FROM incidents WHERE vessel_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 108).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_column ( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total length of all railroads in Canada and the United States?", "schema": "CREATE TABLE railroads (id INT, country VARCHAR(255), total_length FLOAT); INSERT INTO railroads (id, country, total_length) VALUES (1, 'Canada', 48000), (2, 'United States', 246000);", "sql": "SELECT SUM(total_length) FROM railroads WHERE country IN ('Canada', 'United States');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 52).", "schema": null, "sql": "-- It should ignore the sequence.\nSELECT * FROM check_test(\n table_owner_is('public', 'someseq', current_user, 'mumble'),\n\tfalse,\n 'table_owner_is(sch, seq, user, desc)',\n 'mumble',\n ' Table public.someseq does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many teams listed for game 45?", "schema": "CREATE TABLE table_27721131_8 (team VARCHAR, game VARCHAR)", "sql": "SELECT COUNT(team) FROM table_27721131_8 WHERE game = 45;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "pgTAP test for Runjusttests (assertion 10).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION whatever.testz() RETURNS SETOF TEXT AS $$\n SELECT is( MAX(id), NULL, 'Late test should find nothing in the test table') FROM whatever.foo;\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Runjusttests.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 185, "num_statements": 2} {"question": "What is the average budget for humanitarian assistance events in the Arctic region?", "schema": "CREATE TABLE humanitarian_assistance (id INT PRIMARY KEY, event_name VARCHAR(100), budget DECIMAL(10, 2), region VARCHAR(50)); INSERT INTO humanitarian_assistance (id, event_name, budget, region) VALUES (1, 'Event 1', 50000, 'Arctic'), (2, 'Event 2', 75000, 'Antarctic'), (3, 'Event 3', 30000, 'Arctic');", "sql": "SELECT AVG(budget) FROM humanitarian_assistance WHERE region = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average size of the largest order for each salesperson?", "schema": "CREATE TABLE salesperson (id INT, name VARCHAR(50), region VARCHAR(50)); INSERT INTO salesperson (id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE orders (id INT, salesperson_id INT, size INT); INSERT INTO orders (id, salesperson_id, size) VALUES (1, 1, 10), (2, 1, 15), (3, 2, 20), (4, 2, 25);", "sql": "SELECT salesperson_id, AVG(size) as avg_max_order_size FROM (SELECT salesperson_id, MAX(size) as size FROM orders GROUP BY salesperson_id) subquery GROUP BY salesperson_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the average age of healthcare workers in the \"healthcare_workers\" table?", "schema": "CREATE TABLE healthcare_workers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), location VARCHAR(50)); INSERT INTO healthcare_workers (id, name, age, gender, location) VALUES (1, 'John Doe', 35, 'Male', 'New York'); INSERT INTO healthcare_workers (id, name, age, gender, location) VALUES (2, 'Jane Smith', 32, 'Female', 'California');", "sql": "SELECT AVG(age) FROM healthcare_workers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.92.0--0.93.0 (assertion 35).", "schema": null, "sql": "-- language_owner_is ( language, user )\nCREATE OR REPLACE FUNCTION language_owner_is ( NAME, NAME )\nRETURNS TEXT AS $$\n SELECT language_owner_is(\n $1, $2,\n 'Language ' || quote_ident($1) || ' should be owned by ' || quote_ident($2)\n );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.92.0--0.93.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 272, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Name the team for co-drivers of david brabham mario andretti", "schema": "CREATE TABLE table_name_15 (team VARCHAR, co_drivers VARCHAR)", "sql": "SELECT team FROM table_name_15 WHERE co_drivers = 'david brabham mario andretti';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of rooms whose base price is between 120 and 150.", "schema": "CREATE TABLE rooms (roomname VARCHAR, baseprice INTEGER)", "sql": "SELECT roomname FROM rooms WHERE baseprice BETWEEN 120 AND 150;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the total budget allocated to parks in each borough?", "schema": "CREATE TABLE budget_allocations (allocation_id INT, borough TEXT, category TEXT, budget INT); INSERT INTO budget_allocations (allocation_id, borough, category, budget) VALUES (1, 'Manhattan', 'Parks', 5000000), (2, 'Brooklyn', 'Libraries', 3000000), (3, 'Bronx', 'Parks', 2000000);", "sql": "SELECT borough, SUM(budget) FROM budget_allocations WHERE category = 'Parks' GROUP BY borough;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Identify the minimum score achieved by user 11 in game 'C'", "schema": "CREATE TABLE game_scores (user_id INT, game_name VARCHAR(10), score INT); INSERT INTO game_scores (user_id, game_name, score) VALUES (11, 'C', 50), (11, 'C', 75), (11, 'D', 100), (12, 'A', 150), (13, 'B', 200), (13, 'C', 250);", "sql": "SELECT MIN(score) FROM game_scores WHERE user_id = 11 AND game_name = 'C';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many public transportation trips were taken in each borough in 2020?", "schema": "CREATE TABLE PublicTransportation (Year INT, Borough VARCHAR(20), Trips INT); INSERT INTO PublicTransportation (Year, Borough, Trips) VALUES (2020, 'Manhattan', 5000000), (2020, 'Brooklyn', 4000000), (2020, 'Queens', 3500000), (2020, 'Bronx', 3000000), (2020, 'Staten Island', 2000000);", "sql": "SELECT Borough, Year, SUM(Trips) as Total_Trips FROM PublicTransportation GROUP BY Borough, Year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How did Cleveland do in the Open Cup in 2009?", "schema": "CREATE TABLE table_2357201_1 (open_cup VARCHAR, year VARCHAR)", "sql": "SELECT open_cup FROM table_2357201_1 WHERE year = 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Delete all marine protected areas with a depth greater than 200 meters.", "schema": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), location VARCHAR(255), depth FLOAT); INSERT INTO marine_protected_areas (id, name, location, depth) VALUES (1, 'MPA 1', 'Pacific Ocean', 123.4), (2, 'MPA 2', 'Atlantic Ocean', 150.0), (3, 'MPA 3', 'Indian Ocean', 75.0), (4, 'MPA 4', 'Pacific Ocean', 300.0);", "sql": "DELETE FROM marine_protected_areas WHERE depth > 200;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "List all unique metro stations and their respective lines in the 'berlin' schema.", "schema": "CREATE TABLE berlin.metro_stations (id INT, station_name VARCHAR); CREATE TABLE berlin.station_lines (id INT, station_id INT, line_name VARCHAR);", "sql": "SELECT DISTINCT berlin.metro_stations.station_name, berlin.station_lines.line_name FROM berlin.metro_stations INNER JOIN berlin.station_lines ON berlin.metro_stations.id = berlin.station_lines.station_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 161).", "schema": null, "sql": "SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) LEFT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the total population of indigenous communities in each Arctic country?", "schema": "CREATE TABLE IndigenousCommunity (ID INT, Name TEXT, Country TEXT, Population INT); INSERT INTO IndigenousCommunity (ID, Name, Country, Population) VALUES (1, 'Community1', 'Canada', 200); INSERT INTO IndigenousCommunity (ID, Name, Country, Population) VALUES (2, 'Community2', 'Canada', 300); INSERT INTO IndigenousCommunity (ID, Name, Country, Population) VALUES (3, 'Community3', 'Russia', 400);", "sql": "SELECT Country, SUM(Population) as Total_Population FROM IndigenousCommunity GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home stadium of the Tennessee Titans?", "schema": "CREATE TABLE table_name_10 (stadium VARCHAR, host_team VARCHAR)", "sql": "SELECT stadium FROM table_name_10 WHERE host_team = 'tennessee titans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the winning score of standard register ping tournament, which has Kelly Robbins as the runner-up?", "schema": "CREATE TABLE table_name_46 (winning_score VARCHAR, tournament VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT winning_score FROM table_name_46 WHERE tournament = 'standard register ping' AND runner_s__up = 'kelly robbins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 232).", "schema": null, "sql": "CREATE UNIQUE INDEX CONCURRENTLY concur_index3 ON concur_heap(f2);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": true, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of ships involved in maritime accidents in the Atlantic Ocean?", "schema": "CREATE TABLE maritime_accidents (ocean TEXT, year INT, ships_involved INT); INSERT INTO maritime_accidents (ocean, year, ships_involved) VALUES ('Atlantic', 2019, 123), ('Pacific', 2020, 456), ('Indian', 2018, 789);", "sql": "SELECT SUM(ships_involved) FROM maritime_accidents WHERE ocean = 'Atlantic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the highest score in the 'basketball_games' table?", "schema": "CREATE TABLE basketball_games (game_id INT, home_team INT, away_team INT, home_team_score INT, away_team_score INT); INSERT INTO basketball_games (game_id, home_team, away_team, home_team_score, away_team_score) VALUES (1, 1, 2, 100, 90), (2, 2, 1, 95, 105);", "sql": "SELECT GREATEST(home_team_score, away_team_score) AS highest_score FROM basketball_games;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the average number of hours spent on professional development by teachers in 'remote_learning'?", "schema": "CREATE TABLE teachers (teacher_id INT, name VARCHAR(20), program VARCHAR(20)); INSERT INTO teachers (teacher_id, name, program) VALUES (1, 'John Doe', 'remote_learning'), (2, 'Jane Smith', 'in_person'), (3, 'Maria Garcia', 'remote_learning'); CREATE TABLE teacher_pd (teacher_id INT, course VARCHAR(20), hours INT); INSERT INTO teacher_pd (teacher_id, course, hours) VALUES (1, 'technology integration', 12), (2, 'classroom_management', 10), (3, 'diversity_equity_inclusion', 15);", "sql": "SELECT AVG(hours) FROM teacher_pd INNER JOIN teachers ON teacher_pd.teacher_id = teachers.teacher_id WHERE teachers.program = 'remote_learning';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type of the player whose transfer fee was €20m?", "schema": "CREATE TABLE table_13770460_3 (type VARCHAR, transfer_fee VARCHAR)", "sql": "SELECT type FROM table_13770460_3 WHERE transfer_fee = '€20M';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all years in Orlando where Jeryl Sasser was a player.", "schema": "CREATE TABLE table_15621965_17 (years_in_orlando VARCHAR, player VARCHAR)", "sql": "SELECT years_in_orlando FROM table_15621965_17 WHERE player = 'Jeryl Sasser';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the number of policy advocacy events held in 2021?", "schema": "CREATE TABLE Policy_Advocacy_Events (event_id INT, year INT, type VARCHAR(255)); INSERT INTO Policy_Advocacy_Events VALUES (1, 2021, 'Webinar');", "sql": "SELECT COUNT(*) FROM Policy_Advocacy_Events WHERE year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total number of visitors who are members of the museum, broken down by member type?", "schema": "CREATE TABLE members(member_id INT, name VARCHAR(50), member_type VARCHAR(50)); INSERT INTO members (member_id, name, member_type) VALUES (1, 'John Doe', 'Individual'), (2, 'Jane Smith', 'Family'), (3, 'Alice Johnson', 'Individual');", "sql": "SELECT member_type, COUNT(member_id) FROM members GROUP BY member_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of high rebounds for february 10", "schema": "CREATE TABLE table_17058151_8 (high_rebounds VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(high_rebounds) FROM table_17058151_8 WHERE date = 'February 10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Queries (example 6).", "schema": null, "sql": "select tcl_record_arg(row('tkey', 42, 'ref2')::d_comp1, 'ref1');", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the transfer period for habarugira?", "schema": "CREATE TABLE table_name_16 (transfer_window VARCHAR, name VARCHAR)", "sql": "SELECT transfer_window FROM table_name_16 WHERE name = 'habarugira';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many travel advisories were issued for Japan in the last 5 years?", "schema": "CREATE TABLE travel_advisories (advisory_id INT, country TEXT, issue_date DATE); INSERT INTO travel_advisories (advisory_id, country, issue_date) VALUES (1, 'Japan', '2017-01-01'), (2, 'Japan', '2018-03-15'), (3, 'Japan', '2019-07-28'), (4, 'Japan', '2020-12-22'), (5, 'Japan', '2021-03-04');", "sql": "SELECT COUNT(*) FROM travel_advisories WHERE country = 'Japan' AND issue_date >= DATE('now', '-5 year');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the decile of the school with a roll larger than 513?", "schema": "CREATE TABLE table_name_36 (decile INTEGER, roll INTEGER)", "sql": "SELECT SUM(decile) FROM table_name_36 WHERE roll > 513;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 18).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Daniel');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the weight (in kg) of the heaviest spacecraft ever built?", "schema": "CREATE TABLE spacecraft(id INT, name VARCHAR(255), weight_kg FLOAT); INSERT INTO spacecraft(id, name, weight_kg) VALUES (1, 'Saturn V', 303540.0), (2, 'Space Shuttle', 110000.0);", "sql": "SELECT MAX(weight_kg) FROM spacecraft;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'partition_split' (example 230).", "schema": null, "sql": "INSERT INTO sales_range VALUES\n (1, 'May', 1000, '2022-01-31'),\n (2, 'Smirnoff', 500, '2022-02-10'),\n (3, 'Ford', 2000, '2022-04-30'),\n (4, 'Ivanov', 750, '2022-04-13'),\n (5, 'Deev', 250, '2022-04-07'),\n (6, 'Poirot', 150, '2022-02-11'),\n (7, 'Li', 175, '2022-03-08'),\n (8, 'Ericsson', 185, '2022-02-23'),\n (9, 'Muller', 250, '2022-03-11'),\n (10, 'Halder', 350, '2022-01-28'),\n (11, 'Trump', 380, '2022-04-06'),\n (12, 'Plato', 350, '2022-03-19'),\n (13, 'Gandi', 377, '2022-01-09');", "explanation": "DML from PostgreSQL core regression test for Partition Split.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 550, "num_statements": 1} {"question": "PostgreSQL regression test 'hash_func': Write the SELECT query (example 26).", "schema": null, "sql": "SELECT v as value, pg_lsn_hash(v)::bit(32) as standard,\n pg_lsn_hash_extended(v, 0)::bit(32) as extended0,\n pg_lsn_hash_extended(v, 1)::bit(32) as extended1\nFROM (VALUES (NULL::pg_lsn), ('16/B374D84'), ('30/B374D84'),\n ('255/B374D84'), ('25/B379D90'), ('900/F37FD90')) x(v)\nWHERE pg_lsn_hash(v)::bit(32) != pg_lsn_hash_extended(v, 0)::bit(32)\n OR pg_lsn_hash(v)::bit(32) = pg_lsn_hash_extended(v, 1)::bit(32);", "explanation": "Regression test for Hash Func in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT v as value, pg_lsn_hash(v)::bit(32) as standard,\n pg_lsn_hash_extended(v, 0)::bit(32) as extended0,\n pg_lsn_hash_extended(v, 1)::bit(32) as extended1\nFROM (VALUES (NULL::pg_lsn), ('16/B374D84'), ('30/B374D84'),\n ('255/B374D84'), ('25/B379D90'), ('900/F37FD90')) x(v)\nWHERE pg_lsn_hash(v)::bit(32) != pg_lsn_hash_extended(v, 0)::bit(32)\n OR pg_lsn_hash(v)::bit(32) = pg_lsn_hash_extended(v, 1)::bit(32)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 437, "num_statements": 1} {"question": "Find the number of rural healthcare providers that offer mental health services, grouped by provider type.", "schema": "CREATE TABLE healthcare_providers (id INT, name TEXT, type TEXT, services TEXT);", "sql": "SELECT type, COUNT(*) FROM healthcare_providers WHERE services LIKE '%Mental Health%' GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 593).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _relcomp( TEXT, TEXT, TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Which fair labor practices are most frequently implemented in the footwear industry across countries?", "schema": "CREATE TABLE Practices (id INT, name VARCHAR(255), type VARCHAR(255), implementation_location VARCHAR(255), industry VARCHAR(255)); INSERT INTO Practices (id, name, type, implementation_location, industry) VALUES (1, 'Living Wage', 'Fair Labor Practice', 'Indonesia', 'Footwear'); INSERT INTO Practices (id, name, type, implementation_location, industry) VALUES (2, 'Safe Workplace', 'Fair Labor Practice', 'Italy', 'Footwear'); INSERT INTO Practices (id, name, type, implementation_location, industry) VALUES (3, 'Collective Bargaining', 'Fair Labor Practice', 'Brazil', 'Footwear');", "sql": "SELECT implementation_location, COUNT(*) FROM Practices WHERE type = 'Fair Labor Practice' AND industry = 'Footwear' GROUP BY implementation_location ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_prune': Write the SELECT query (example 268).", "schema": null, "sql": "select tableoid::regclass, * from hp order by c;", "explanation": "Regression test for Partition Prune in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select tableoid::regclass, * from hp order by c) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What was the total number of art pieces sold at the \"London Art Gallery\" in the first quarter of 2021?", "schema": "CREATE TABLE ArtSales5 (GalleryName TEXT, SaleDate DATE, NumPieces INTEGER); INSERT INTO ArtSales5 (GalleryName, SaleDate, NumPieces) VALUES ('London Art Gallery', '2021-01-01', 8), ('London Art Gallery', '2021-02-15', 12), ('London Art Gallery', '2021-03-20', 16);", "sql": "SELECT SUM(NumPieces) FROM ArtSales5 WHERE GalleryName = 'London Art Gallery' AND QUARTER(SaleDate) = 1 AND YEAR(SaleDate) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 42).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : null, \"dependency\" : 4, \"degree\": 1.000}]', 'pg_dependencies');", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : null, \"dependency\" : 4, \"degree\": 1.000}]', 'pg_dependencies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the minimum age of visitors who attended the exhibition 'The Art of the Silk Road'?", "schema": "CREATE TABLE exhibitions (id INT, city VARCHAR(20), visitor_age INT, visit_date DATE); INSERT INTO exhibitions (id, city, visitor_age, visit_date) VALUES (1, 'New York', 12, '2022-01-01'); INSERT INTO exhibitions (id, city, visitor_age, visit_date) VALUES (2, 'Los Angeles', 15, '2022-02-15');", "sql": "SELECT MIN(visitor_age) FROM exhibitions WHERE exhibition_name = 'The Art of the Silk Road';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Add a new column 'precinct' to the 'crime_statistics' table", "schema": "CREATE TABLE crime_statistics(id INT, location VARCHAR(20), time DATE);", "sql": "ALTER TABLE crime_statistics ADD COLUMN precinct VARCHAR(10);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the name of the forest where the highest temperature was recorded and it was established after the average year of establishment?", "schema": "CREATE TABLE Forests (id INT, name VARCHAR(50), country VARCHAR(50), hectares INT, year_established INT); CREATE TABLE Climate (id INT, temperature FLOAT, year INT, forest_id INT); INSERT INTO Forests (id, name, country, hectares, year_established) VALUES (1, 'Bialowieza', 'Poland', 141000, 1921), (2, 'Amazon', 'Brazil', 340000, 1968), (3, 'Daintree', 'Australia', 12000, 1770); INSERT INTO Climate (id, temperature, year, forest_id) VALUES (1, 15.5, 1921, 1), (2, 28.7, 2005, 2), (3, 34.1, 1998, 3), (4, 26.3, 1982, 2);", "sql": "SELECT Forests.name FROM Forests, Climate WHERE Forests.id = Climate.forest_id AND temperature = (SELECT MAX(temperature) FROM Climate) AND year_established > (SELECT AVG(year_established) FROM Forests);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the location is Chicago, Illinois, what is the airport name?", "schema": "CREATE TABLE table_18047346_4 (airport_name VARCHAR, location VARCHAR)", "sql": "SELECT airport_name FROM table_18047346_4 WHERE location = 'Chicago, Illinois';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the percentage of sales by ethical material?", "schema": "CREATE TABLE SalesByMaterial (SaleID INT, Material VARCHAR(50), Sales DECIMAL(5,2)); INSERT INTO SalesByMaterial (SaleID, Material, Sales) VALUES (1, 'Organic Cotton', 1200.50), (2, 'Hemp', 752.20), (3, 'Recycled Polyester', 986.60), (4, 'Tencel', 310.10);", "sql": "SELECT Material, ROUND(SUM(Sales) / (SELECT SUM(Sales) FROM SalesByMaterial) * 100, 2) AS Percentage FROM SalesByMaterial GROUP BY Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Mariners opponent at the game attended by 7,893?", "schema": "CREATE TABLE table_name_49 (opponent VARCHAR, attendance VARCHAR)", "sql": "SELECT opponent FROM table_name_49 WHERE attendance = '7,893';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'misc_functions' (example 119).", "schema": null, "sql": "-- Test functions for control data\nSELECT count(*) > 0 AS ok FROM pg_control_checkpoint();", "explanation": "PL/pgSQL object from PostgreSQL core test for Misc Functions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the most common hashtag used in posts made by users from India?", "schema": "CREATE TABLE posts (id INT, user_id INT, content TEXT, hashtags TEXT, post_date DATE); INSERT INTO posts (id, user_id, content, hashtags, post_date) VALUES (1, 1, 'Hello World', '#datascience', '2022-06-01'), (2, 1, 'I love data', '#ai', '2022-06-02'), (3, 2, 'Namaste', '#india', '2022-06-03'); CREATE TABLE users (id INT, name VARCHAR(100), country VARCHAR(50)); INSERT INTO users (id, name, country) VALUES (1, 'Akshay', 'India'), (2, 'Bhavna', 'India');", "sql": "SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(hashtags, ' ', n.n), ' ', -1) hashtag, COUNT(*) count FROM posts JOIN users ON posts.user_id = users.id CROSS JOIN (SELECT 1 n UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) n WHERE users.country = 'India' GROUP BY hashtag ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 304, "num_statements": 1} {"question": "What was the total revenue for vegan skincare products in the US in Q3 2021?", "schema": "CREATE TABLE skincare_sales (sale_date DATE, product_vegan BOOLEAN, revenue DECIMAL(10,2)); INSERT INTO skincare_sales (sale_date, product_vegan, revenue) VALUES ('2021-07-01', TRUE, 60.00), ('2021-07-02', FALSE, 50.00);", "sql": "SELECT SUM(revenue) FROM skincare_sales WHERE product_vegan = TRUE AND sale_date BETWEEN '2021-07-01' AND '2021-09-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the partner for the jazz piece in the bottom three?", "schema": "CREATE TABLE table_name_40 (partner VARCHAR, style VARCHAR, results VARCHAR)", "sql": "SELECT partner FROM table_name_40 WHERE style = 'jazz' AND results = 'bottom three';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 89).", "schema": null, "sql": "SELECT * FROM test_type_conversion_array_int4(NULL);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which model number does bombardier manufacture?", "schema": "CREATE TABLE table_name_28 (model_no VARCHAR, manufacturer VARCHAR)", "sql": "SELECT model_no FROM table_name_28 WHERE manufacturer = 'bombardier';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all news articles related to 'corruption' or 'ethics', returning their title, content, and author's name.", "schema": "CREATE TABLE articles_ext (id INT, title VARCHAR(255), content TEXT, author_id INT); INSERT INTO articles_ext (id, title, content, author_id) VALUES (1, 'Article 1', 'Corruption is...', 1), (2, 'Article 2', 'Ethics in...', 2); CREATE TABLE authors_ext (id INT, name VARCHAR(255)); INSERT INTO authors_ext (id, name) VALUES (1, 'Author 1'), (2, 'Author 2');", "sql": "SELECT a.title, a.content, au.name FROM articles_ext a JOIN authors_ext au ON a.author_id = au.id WHERE a.title LIKE '%corruption%' OR a.title LIKE '%ethics%' OR a.content LIKE '%corruption%' OR a.content LIKE '%ethics%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "How many deep-sea expeditions have been conducted in the Arctic ocean by research vessels registered in the United States?", "schema": "CREATE TABLE deep_sea_expeditions (expedition_id INTEGER, ocean TEXT, research_vessel_flag TEXT); CREATE TABLE research_vessels (vessel_id INTEGER, vessel_name TEXT, vessel_flag TEXT);", "sql": "SELECT COUNT(expedition_id) FROM deep_sea_expeditions JOIN research_vessels ON deep_sea_expeditions.research_vessel_flag = research_vessels.vessel_flag WHERE ocean = 'Arctic' AND research_vessels.vessel_flag = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 226, "num_statements": 1} {"question": "Update the exhibition 'Art of the Indigenous' to extend its duration until the end of 2024.", "schema": "CREATE TABLE Exhibitions (exhibition_id INT, exhibition_name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO Exhibitions (exhibition_id, exhibition_name, start_date, end_date) VALUES (1, 'Art of the Indigenous', '2023-01-01', '2023-12-31');", "sql": "UPDATE Exhibitions SET end_date = '2024-12-31' WHERE exhibition_name = 'Art of the Indigenous';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "How many employees have been trained in circular economy principles since the beginning of the program?", "schema": "CREATE TABLE employee_training (employee_id INT, training_date DATE, topic VARCHAR(50));", "sql": "SELECT COUNT(*) FROM employee_training WHERE topic = 'Circular Economy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the official ITV1 HD rating in millions for the episode that had an official ITV1 rating of 8.98 million?", "schema": "CREATE TABLE table_27319183_5 (official_itv1_hd_rating__millions_ VARCHAR, official_itv1_rating__millions_ VARCHAR)", "sql": "SELECT official_itv1_hd_rating__millions_ FROM table_27319183_5 WHERE official_itv1_rating__millions_ = '8.98';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Use generate_series to create a calendar table for date-based reporting.", "schema": null, "sql": "SELECT d::date AS calendar_date,\n EXTRACT(dow FROM d) AS day_of_week,\n EXTRACT(week FROM d) AS iso_week,\n to_char(d, 'Month') AS month_name\nFROM generate_series('2025-01-01'::date, '2025-12-31'::date, '1 day'::interval) AS d;", "explanation": "generate_series with date arguments produces one row per day. Combined with EXTRACT and to_char for reporting dimensions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "What was the average rental duration for adaptive bikes in February 2022?", "schema": "CREATE TABLE bike_share (bike_id INT, bike_type VARCHAR(255), rental_start_time TIMESTAMP, rental_end_time TIMESTAMP); INSERT INTO bike_share (bike_id, bike_type, rental_start_time, rental_end_time) VALUES (6, 'Adaptive', '2022-02-01 10:00:00', '2022-02-01 12:00:00');", "sql": "SELECT AVG(TIMESTAMPDIFF(SECOND, rental_start_time, rental_end_time)) AS avg_rental_duration FROM bike_share WHERE bike_type = 'Adaptive' AND rental_start_time >= '2022-02-01' AND rental_start_time < '2022-03-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 213, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the game between Melbourne and Footscray?", "schema": "CREATE TABLE table_name_78 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_78 WHERE home_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With 14 under the date, what is the tonnage of the ship?", "schema": "CREATE TABLE table_name_19 (tonnage VARCHAR, date VARCHAR)", "sql": "SELECT tonnage FROM table_name_19 WHERE date = '14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'gin' (example 2).", "schema": null, "sql": "create index gin_test_idx on gin_test_tbl using gin (i)\n with (fastupdate = on, gin_pending_list_limit = 4096);", "explanation": "DDL from PostgreSQL core regression test for Gin.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many overs when there are 5231 runs and fewer than 37 matches?", "schema": "CREATE TABLE table_name_33 (overs INTEGER, runs VARCHAR, matches VARCHAR)", "sql": "SELECT SUM(overs) FROM table_name_33 WHERE runs = 5231 AND matches < 37;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team score when st kilda is the away team?", "schema": "CREATE TABLE table_name_51 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_51 WHERE away_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the attendance when the location is veterans stadium, the game is more than 3 and the time is 2:21?", "schema": "CREATE TABLE table_name_39 (attendance INTEGER, time VARCHAR, location VARCHAR, game VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_39 WHERE location = 'veterans stadium' AND game > 3 AND time = '2:21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the total playtime of player 'Olivia'?", "schema": "CREATE TABLE player_sessions (id INT, player_name TEXT, playtime INT); INSERT INTO player_sessions (id, player_name, playtime) VALUES (1, 'Olivia', 120); INSERT INTO player_sessions (id, player_name, playtime) VALUES (2, 'Olivia', 150); INSERT INTO player_sessions (id, player_name, playtime) VALUES (3, 'William', 100);", "sql": "SELECT SUM(playtime) FROM player_sessions WHERE player_name = 'Olivia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date with the catalogue # cocy-80093?", "schema": "CREATE TABLE table_name_37 (date VARCHAR, catalogue__number VARCHAR)", "sql": "SELECT date FROM table_name_37 WHERE catalogue__number = 'cocy-80093';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the number of unique animals in the 'animal_population' table that are not endangered?", "schema": "CREATE TABLE animal_population (id INT, animal_name VARCHAR(50), population INT, endangered_status VARCHAR(50));", "sql": "SELECT COUNT(DISTINCT animal_name) FROM animal_population WHERE endangered_status != 'Endangered';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What place is Nolan Henke in?", "schema": "CREATE TABLE table_name_10 (place VARCHAR, player VARCHAR)", "sql": "SELECT place FROM table_name_10 WHERE player = 'nolan henke';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the different parties of representative? Show the party name and the number of representatives in each party.", "schema": "CREATE TABLE representative (Party VARCHAR)", "sql": "SELECT Party, COUNT(*) FROM representative GROUP BY Party;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many regions have wildlife habitats?", "schema": "CREATE TABLE regions (region_id INT, region_name TEXT);CREATE TABLE wildlife_habitat (habitat_id INT, region_id INT); INSERT INTO regions (region_id, region_name) VALUES (1, 'Region A'), (2, 'Region B'), (3, 'Region C'); INSERT INTO wildlife_habitat (habitat_id, region_id) VALUES (1, 1), (2, 1), (3, 2), (4, 3), (5, 3);", "sql": "SELECT region_id, region_name, COUNT(*) FROM regions JOIN wildlife_habitat ON regions.region_id = wildlife_habitat.region_id GROUP BY region_id, region_name HAVING COUNT(*) > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What is the average salary of workers in the workforce development sector, grouped by their job title and region?", "schema": "CREATE TABLE workers (worker_id INT, sector VARCHAR(255), job_title VARCHAR(255), region VARCHAR(255), salary DECIMAL(10,2)); INSERT INTO workers (worker_id, sector, job_title, region, salary) VALUES (1, 'Workforce Development', 'Engineer', 'North America', 80000.00), (2, 'Workforce Development', 'Technician', 'North America', 50000.00), (3, 'Workforce Development', 'Manager', 'Europe', 90000.00);", "sql": "SELECT job_title, region, AVG(salary) FROM workers WHERE sector = 'Workforce Development' GROUP BY job_title, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Manufacturer has a Grid smaller than 9, and a Time of +22.517?", "schema": "CREATE TABLE table_name_47 (manufacturer VARCHAR, grid VARCHAR, time VARCHAR)", "sql": "SELECT manufacturer FROM table_name_47 WHERE grid < 9 AND time = '+22.517';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the sum of diversity metric scores for companies founded by individuals who identify as disabled in the cybersecurity industry?", "schema": "CREATE TABLE companies (id INT, name TEXT, industry TEXT, founding_date DATE, founder_identity TEXT, diversity_metric FLOAT); INSERT INTO companies (id, name, industry, founding_date, founder_identity, diversity_metric) VALUES (1, 'CyberSecure', 'Cybersecurity', '2018-01-01', 'Disabled', 0.65);", "sql": "SELECT SUM(diversity_metric) FROM companies WHERE industry = 'Cybersecurity' AND founder_identity = 'Disabled';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Get the names and number of stories of all buildings in 'buildings' table taller than 300 meters", "schema": "CREATE TABLE buildings (building_id INT PRIMARY KEY, building_name VARCHAR(100), number_of_stories INT, height FLOAT, country VARCHAR(50));", "sql": "SELECT building_name, number_of_stories FROM buildings WHERE height > 300;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the rank of each defendant's age within their court case?", "schema": "CREATE TABLE court_cases (case_id INT, court_date DATE); INSERT INTO court_cases (case_id, court_date) VALUES (1, '2022-01-01'), (2, '2021-12-20'), (3, '2022-02-15'); CREATE TABLE defendant_info (defendant_id INT, case_id INT, age INT, gender VARCHAR(50)); INSERT INTO defendant_info (defendant_id, case_id, age, gender) VALUES (1, 1, 35, 'Male'), (2, 2, 27, 'Female'), (3, 1, 42, 'Non-binary'), (4, 3, 19, 'Female'), (5, 3, 50, 'Male'), (6, 1, 32, 'Male');", "sql": "SELECT defendant_id, case_id, age, ROW_NUMBER() OVER (PARTITION BY case_id ORDER BY age) as age_rank FROM defendant_info;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 121, "num_statements": 1} {"question": "Identify regulatory frameworks in the US and their respective digital assets.", "schema": "CREATE TABLE DigitalAssets (AssetId INT, AssetName VARCHAR(50), RegulatorId INT); CREATE TABLE Regulators (RegulatorId INT, RegulatorName VARCHAR(50), Region VARCHAR(50)); INSERT INTO DigitalAssets (AssetId, AssetName, RegulatorId) VALUES (1, 'ETH', 1); INSERT INTO DigitalAssets (AssetId, AssetName, RegulatorId) VALUES (2, 'BTC', 2); INSERT INTO DigitalAssets (AssetId, AssetName, RegulatorId) VALUES (3, 'LTC', 3); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (1, 'Regulator1', 'US'); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (2, 'Regulator2', 'US'); INSERT INTO Regulators (RegulatorId, RegulatorName, Region) VALUES (3, 'Regulator3', 'Canada');", "sql": "SELECT da.AssetName, r.RegulatorName FROM DigitalAssets da INNER JOIN Regulators r ON da.RegulatorId = r.RegulatorId WHERE r.Region = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the total volume of wastewater treated in Jakarta in 2019?", "schema": "CREATE TABLE wastewater_treatment(city VARCHAR(20), year INT, volume INT); INSERT INTO wastewater_treatment(city, year, volume) VALUES ('Jakarta', 2015, 12000), ('Jakarta', 2016, 13000), ('Jakarta', 2017, 14000), ('Jakarta', 2018, 15000), ('Jakarta', 2019, 0);", "sql": "SELECT SUM(volume) FROM wastewater_treatment WHERE city = 'Jakarta' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What stadium was the game played at when the result was hunter mariners def. sheffield eagles?", "schema": "CREATE TABLE table_name_43 (stadium VARCHAR, result VARCHAR)", "sql": "SELECT stadium FROM table_name_43 WHERE result = 'hunter mariners def. sheffield eagles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many earnings have Wins larger than 3?", "schema": "CREATE TABLE table_name_70 (earnings___ INTEGER, wins INTEGER)", "sql": "SELECT SUM(earnings___) AS $__ FROM table_name_70 WHERE wins > 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many traffic accidents were there in each neighborhood in the last 3 months, grouped by day?", "schema": "CREATE TABLE neighborhoods (id INT, name TEXT);CREATE TABLE accidents (id INT, neighborhood_id INT, date DATE);", "sql": "SELECT n.name, DATEADD(day, DATEDIFF(day, 0, a.date), 0) AS truncated_date, COUNT(a.id) FROM neighborhoods n JOIN accidents a ON n.id = a.neighborhood_id WHERE a.date >= DATEADD(month, -3, GETDATE()) GROUP BY n.id, truncated_date ORDER BY truncated_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 405).", "schema": null, "sql": "select split_part('joeuser@mydatabase','@@',1) AS \"joeuser@mydatabase\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select split_part('joeuser@mydatabase','@@',1) AS \"joeuser@mydatabase\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which republican was first elected in 1886 in the district of ohio 17?", "schema": "CREATE TABLE table_name_16 (result VARCHAR, district VARCHAR, party VARCHAR, first_elected VARCHAR)", "sql": "SELECT result FROM table_name_16 WHERE party = 'republican' AND first_elected = '1886' AND district = 'ohio 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Gold has a Rank larger than 6, and a Nation of netherlands, and a Bronze smaller than 0?", "schema": "CREATE TABLE table_name_25 (gold INTEGER, bronze VARCHAR, rank VARCHAR, nation VARCHAR)", "sql": "SELECT SUM(gold) FROM table_name_25 WHERE rank > 6 AND nation = 'netherlands' AND bronze < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What are the unique threat_actors involved in incidents that occurred in the 'Industrial' sector?", "schema": "CREATE TABLE incidents (id INT, threat_actor VARCHAR(255), incident_type VARCHAR(255), sector VARCHAR(255)); INSERT INTO incidents (id, threat_actor, incident_type, sector) VALUES (1, 'APT28', 'Network Intrusion', 'Financial'), (2, 'APT33', 'Data Exfiltration', 'Industrial'), (3, 'APT34', 'Network Intrusion', 'Government'), (4, 'APT29', 'Data Exfiltration', 'Industrial'), (5, 'APT35', 'Network Intrusion', 'Industrial');", "sql": "SELECT DISTINCT threat_actor FROM incidents WHERE sector = 'Industrial';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the names and categories of all projects that were completed in 2020 or earlier and have a budget greater than the average budget for all completed projects.", "schema": "CREATE TABLE projects (id INT, name VARCHAR(255), category VARCHAR(255), budget FLOAT, year INT, status VARCHAR(255)); INSERT INTO projects (id, name, category, budget, year, status) VALUES (11, 'Traffic Signal Upgrade', 'Transportation', 100000.00, 2019, 'Completed');", "sql": "SELECT name, category FROM projects WHERE (year <= 2020 OR year IS NULL) AND status = 'Completed' AND budget > (SELECT AVG(budget) FROM projects WHERE status = 'Completed');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the lowest lost entry for a team with fewer than 21 goals for?", "schema": "CREATE TABLE table_name_39 (lost INTEGER, goals_for INTEGER)", "sql": "SELECT MIN(lost) FROM table_name_39 WHERE goals_for < 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'spi' (example 27).", "schema": null, "sql": "insert into fkeys values (40, '4', 2);", "explanation": "Example query from the 'spi' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (size_utils, item 8).", "schema": null, "sql": "-- Get relation size of hypertable\n-- like pg_relation_size(hypertable)\n--\n-- hypertable - hypertable to get size of\n--\n-- Returns:\n-- table_bytes - Disk space used by hypertable (like pg_relation_size(hypertable))\n-- index_bytes - Disk space used by indexes\n-- toast_bytes - Disk space of toast tables\n-- total_bytes - Total disk space used by the specified table, including all indexes and TOAST data\n\nCREATE OR REPLACE FUNCTION @extschema@.hypertable_detailed_size(\n hypertable REGCLASS)\nRETURNS TABLE (table_bytes BIGINT,\n index_bytes BIGINT,\n toast_bytes BIGINT,\n total_bytes BIGINT,\n node_name NAME)\nLANGUAGE PLPGSQL VOLATILE STRICT AS\n$BODY$\nDECLARE\n table_name NAME = NULL;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 796, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Week had a Result of w 23-17?", "schema": "CREATE TABLE table_name_58 (week INTEGER, result VARCHAR)", "sql": "SELECT SUM(week) FROM table_name_58 WHERE result = 'w 23-17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the points when played is less than 38?", "schema": "CREATE TABLE table_name_6 (points INTEGER, played INTEGER)", "sql": "SELECT SUM(points) FROM table_name_6 WHERE played < 38;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "List the names and locations of Europium producers that started production after 2015.", "schema": "CREATE TABLE europium_production (producer_id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), start_year INT);", "sql": "SELECT name, location FROM europium_production WHERE start_year > 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who has the low lap total in a maserati with grid 2?", "schema": "CREATE TABLE table_name_47 (laps INTEGER, constructor VARCHAR, grid VARCHAR)", "sql": "SELECT MIN(laps) FROM table_name_47 WHERE constructor = 'maserati' AND grid = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the constructor of car 22?", "schema": "CREATE TABLE table_15491596_1 (constructor VARCHAR, no VARCHAR)", "sql": "SELECT constructor FROM table_15491596_1 WHERE no = 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total billing amount for cases in Texas handled by attorneys who have passed the bar exam in that state?", "schema": "CREATE TABLE attorneys (attorney_id INT, name TEXT, state TEXT, passed_bar_exam_tx BOOLEAN); INSERT INTO attorneys (attorney_id, name, state, passed_bar_exam_tx) VALUES (1, 'Jane Doe', 'Texas', TRUE), (2, 'John Smith', 'California', FALSE), (3, 'Sara Connor', 'Texas', TRUE), (4, 'Tom Williams', 'New York', FALSE); CREATE TABLE cases (case_id INT, attorney_id INT, billing_amount INT, state TEXT); INSERT INTO cases (case_id, attorney_id, billing_amount, state) VALUES (1, 1, 10000, 'Texas'), (2, 2, 8000, 'California'), (3, 3, 15000, 'Texas'), (4, 4, 6000, 'New York');", "sql": "SELECT SUM(cases.billing_amount) FROM cases INNER JOIN attorneys ON cases.attorney_id = attorneys.attorney_id WHERE attorneys.passed_bar_exam_tx = TRUE AND cases.state = attorneys.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "How many unique donors have donated to programs related to Children's Welfare in total?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT); CREATE TABLE Donations (DonationID INT, DonorID INT, ProgramID INT); CREATE TABLE Programs (ProgramID INT, ProgramName TEXT); INSERT INTO Donors (DonorID, DonorName) VALUES (1, 'Anna'), (2, 'Bella'), (3, 'Charlie'); INSERT INTO Donations (DonationID, DonorID, ProgramID) VALUES (1, 1, 1), (2, 1, 2), (3, 2, 2), (4, 3, 3); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, 'Children''s Education'), (2, 'Children''s Health'), (3, 'Women''s Rights');", "sql": "SELECT COUNT(DISTINCT d.DonorID) FROM Donations d JOIN Programs p ON d.ProgramID = p.ProgramID WHERE p.ProgramName LIKE '%Children%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'boolean': Write the SELECT query (example 32).", "schema": null, "sql": "SELECT not bool 'f' AS true;", "explanation": "Regression test for Boolean in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT not bool 'f' AS true) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "What's the total donation amount for each program in Q4 2022, only for programs with more than 500 participants?", "schema": "CREATE TABLE programs_q4_2022 (id INT, program_name VARCHAR(50), participants INT, donation_amount DECIMAL(10,2)); INSERT INTO programs_q4_2022 (id, program_name, participants, donation_amount) VALUES (1, 'Program M', 600, 8000.00), (2, 'Program N', 400, 7000.00), (3, 'Program O', 800, 9000.00), (4, 'Program P', 300, 6000.00), (5, 'Program Q', 700, 10000.00);", "sql": "SELECT program_name, SUM(donation_amount) as total_donation FROM programs_q4_2022 WHERE participants > 500 GROUP BY program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 759).", "schema": null, "sql": "create table other_partitioned_fk_1 partition of other_partitioned_fk\n for values in (2048);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the game when the venue is Princes Park?", "schema": "CREATE TABLE table_name_40 (date VARCHAR, venue VARCHAR)", "sql": "SELECT date FROM table_name_40 WHERE venue = 'princes park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which artists have had concerts in both New York and Los Angeles?", "schema": "CREATE TABLE concerts (id INT, artist_name VARCHAR(255), city VARCHAR(255), revenue FLOAT); INSERT INTO concerts (id, artist_name, city, revenue) VALUES (1, 'Taylor Swift', 'Los Angeles', 500000.00), (2, 'BTS', 'New York', 750000.00), (3, 'Adele', 'London', 600000.00), (4, 'Taylor Swift', 'New York', 600000.00);", "sql": "SELECT artist_name FROM concerts WHERE city IN ('New York', 'Los Angeles') GROUP BY artist_name HAVING COUNT(DISTINCT city) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What status has paleopanax as the name?", "schema": "CREATE TABLE table_name_78 (status VARCHAR, name VARCHAR)", "sql": "SELECT status FROM table_name_78 WHERE name = 'paleopanax';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'eager_aggregate' (example 84).", "schema": null, "sql": "CREATE TABLE eager_agg_tab_ml_p2_s1 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (10) TO (15);", "explanation": "DDL from PostgreSQL core regression test for Eager Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Oricon has a Romaji title of nakitakunalu?", "schema": "CREATE TABLE table_name_17 (oricon INTEGER, romaji_title VARCHAR)", "sql": "SELECT MAX(oricon) FROM table_name_17 WHERE romaji_title = 'nakitakunalu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which opponent has a Date of april 14?", "schema": "CREATE TABLE table_name_20 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_20 WHERE date = 'april 14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the distribution of customer complaints by region in the past month?", "schema": "CREATE TABLE customer_complaints (complaint_id INT, complaint_type VARCHAR(50), region VARCHAR(50), complaint_date DATE); INSERT INTO customer_complaints (complaint_id, complaint_type, region, complaint_date) VALUES (1, 'Billing', 'San Francisco', '2021-03-01'), (2, 'Network', 'San Francisco', '2021-03-05'), (3, 'Billing', 'New York', '2021-03-10'), (4, 'Billing', 'Los Angeles', '2021-03-15');", "sql": "SELECT region, complaint_type, COUNT(*) as complaints, PERCENT_RANK() OVER (PARTITION BY region ORDER BY complaints DESC) as complaint_percentile FROM customer_complaints WHERE complaint_date >= DATEADD(month, -1, CURRENT_DATE) GROUP BY region, complaint_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 260, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 99).", "schema": null, "sql": "SELECT '92233720368547758.07'::money::numeric;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '92233720368547758.07'::money::numeric) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'brin_bloom' (example 54).", "schema": null, "sql": "INSERT INTO brintest_bloom SELECT\n\trepeat(stringu1, 42)::bytea,\n\tsubstr(stringu1, 1, 1)::\"char\",\n\tstringu1::name, 142857 * tenthous,\n\tthousand,\n\ttwothousand,\n\trepeat(stringu1, 42),\n\tunique1::oid,\n\t(four + 1.0)/(hundred+1),\n\todd::float8 / (tenthous + 1),\n\tformat('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,\n\tinet '10.2.3.4' + tenthous,\n\tcidr '10.2.3/24' + tenthous,\n\tsubstr(stringu1, 1, 1)::bpchar,\n\tdate '1995-08-15' + tenthous,\n\ttime '01:20:30' + thousand * interval '18.5 second',\n\ttimestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',\n\ttimestamptz '1972-10-10 03:00' + thousand * interval '1 hour',\n\tjustify_days(justify_hours(tenthous * interval '12 minutes')),\n\ttimetz '01:30:20' + hundred * interval '15 seconds',\n\ttenthous::numeric(36,30) * fivethous * even / (hundred + 1),\n\tformat('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,\n\tformat('%s/%s%s', odd, even, tenthous)::pg_lsn\nFROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;", "explanation": "DML from PostgreSQL core regression test for Brin Bloom.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 1192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Place, when Player is \"Phil Mickelson\"?", "schema": "CREATE TABLE table_name_17 (place VARCHAR, player VARCHAR)", "sql": "SELECT place FROM table_name_17 WHERE player = 'phil mickelson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Insert new records for mobile subscribers with their respective data usage in the North region.", "schema": "CREATE TABLE mobile_subscribers_3 (subscriber_id INT, data_usage FLOAT, region VARCHAR(20)); INSERT INTO mobile_subscribers_3 (subscriber_id, data_usage, region) VALUES (7, 22.6, 'North'), (8, 29.8, 'North'), (9, 20.1, 'North');", "sql": "INSERT INTO mobile_subscribers SELECT * FROM mobile_subscribers_3 WHERE region = 'North';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Driver on Bob Keselowski's team?", "schema": "CREATE TABLE table_name_57 (driver_s_ VARCHAR, owner_s_ VARCHAR)", "sql": "SELECT driver_s_ FROM table_name_57 WHERE owner_s_ = 'bob keselowski';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the fewest wickets when player's career spanned 1946-1960 and maidens were more than 419?", "schema": "CREATE TABLE table_name_84 (wickets INTEGER, career VARCHAR, maidens VARCHAR)", "sql": "SELECT MIN(wickets) FROM table_name_84 WHERE career = '1946-1960' AND maidens > 419;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Region that has a growth of −3.6%?", "schema": "CREATE TABLE table_name_24 (region VARCHAR, _percentage_growth VARCHAR)", "sql": "SELECT region FROM table_name_24 WHERE _percentage_growth = '−3.6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Which users have played all games in the 'gaming' database?", "schema": "CREATE TABLE users_games_2 (user_id INT, game_id INT, played_at TIMESTAMP); INSERT INTO users_games_2 (user_id, game_id, played_at) VALUES (1, 1, '2021-01-01 10:00:00'), (2, 1, '2021-01-02 11:00:00'), (3, 2, '2021-01-03 12:00:00'), (4, 2, '2021-01-04 13:00:00'), (5, 3, '2021-01-05 14:00:00'), (6, 1, '2021-01-06 15:00:00'), (6, 2, '2021-01-07 16:00:00'), (6, 3, '2021-01-08 17:00:00');", "sql": "SELECT user_id FROM users_games_2 GROUP BY user_id HAVING COUNT(DISTINCT game_id) = (SELECT COUNT(DISTINCT game_id) FROM users_games_2);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the average completion time for sustainable building projects in Washington?", "schema": "CREATE TABLE project_timelines (id INT, project_id INT, project_type TEXT, completion_date DATE); INSERT INTO project_timelines (id, project_id, project_type, completion_date) VALUES (1, 111, 'Sustainable', '2022-10-01'), (2, 222, 'Sustainable', '2022-11-15'), (3, 333, 'Conventional', '2022-09-30');", "sql": "SELECT AVG(DATEDIFF(completion_date, issue_date)) FROM building_permits bp JOIN project_timelines pt ON bp.permit_number = pt.project_id WHERE bp.project_state = 'Washington' AND pt.project_type = 'Sustainable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "List the names and daily transaction counts of all digital assets on the 'Solana' blockchain.", "schema": "CREATE TABLE crypto_assets (asset_id INT, asset_name VARCHAR(50), blockchain_id INT, daily_transactions INT); INSERT INTO crypto_assets (asset_id, asset_name, blockchain_id, daily_transactions) VALUES (1, 'Solana Token', 2, 10000); INSERT INTO crypto_assets (asset_id, asset_name, blockchain_id, daily_transactions) VALUES (2, 'Serum', 2, 8000); INSERT INTO blockchains (blockchain_id, blockchain_name) VALUES (2, 'Solana');", "sql": "SELECT crypto_assets.asset_name, crypto_assets.daily_transactions FROM crypto_assets INNER JOIN blockchains ON crypto_assets.blockchain_id = blockchains.blockchain_id WHERE blockchains.blockchain_name = 'Solana';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (drop_schema, item 10).", "schema": null, "sql": "CREATE TABLE hypertable_schema.test2 (time timestamptz, temp float, location int);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 29).", "schema": null, "sql": "SELECT name, 'A' = name AS \"eq_A\" FROM try WHERE name <> 'â';", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average response time for emergency calls in the \"downtown\" region?", "schema": "CREATE TABLE EmergencyCalls (id INT, region VARCHAR(20), response_time INT);", "sql": "SELECT AVG(response_time) FROM EmergencyCalls WHERE region = 'downtown';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of donations and the total donation amount for each month in the year 2020?", "schema": "CREATE TABLE DonationHistory (DonationID int, DonationDate date); INSERT INTO DonationHistory (DonationID, DonationDate) VALUES (1, '2020-01-01'), (2, '2020-02-01'), (3, '2020-03-01'); CREATE TABLE Donations (DonationID int, DonationAmount numeric); INSERT INTO Donations (DonationID, DonationAmount) VALUES (1, 500), (2, 1200), (3, 250);", "sql": "SELECT EXTRACT(MONTH FROM DonationDate) as Month, COUNT(*) as NumDonations, SUM(DonationAmount) as TotalDonationAmount FROM DonationHistory JOIN Donations ON DonationHistory.DonationID = Donations.DonationID WHERE EXTRACT(YEAR FROM DonationDate) = 2020 GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 268, "num_statements": 1} {"question": "What is the total playtime (in minutes) for each game in the \"MobileGamingCommunity\"?", "schema": "CREATE TABLE Games (GameID INT PRIMARY KEY, GameName VARCHAR(50), GamingCommunity VARCHAR(50)); CREATE TABLE GameSessions (SessionID INT PRIMARY KEY, GameName VARCHAR(50), Playtime MINUTE, FOREIGN KEY (GameName) REFERENCES Games(GameName)); INSERT INTO Games (GameID, GameName, GamingCommunity) VALUES (1, 'ClashOfClans', 'MobileGamingCommunity'), (2, 'PUBGMobile', 'MobileGamingCommunity'), (3, 'FortniteMobile', 'MobileGamingCommunity'); INSERT INTO GameSessions (SessionID, GameName, Playtime) VALUES (1, 'ClashOfClans', 120), (2, 'ClashOfClans', 150), (3, 'PUBGMobile', 200), (4, 'FortniteMobile', 250);", "sql": "SELECT GameName, SUM(Playtime) FROM GameSessions JOIN Games ON GameSessions.GameName = Games.GameName WHERE Games.GamingCommunity = 'MobileGamingCommunity' GROUP BY GameName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the average economic diversification investment in the 'economic_diversification' schema?", "schema": "CREATE TABLE economic_diversification.investments (id INT, investment_type VARCHAR(50), amount FLOAT); INSERT INTO economic_diversification.investments (id, investment_type, amount) VALUES (1, 'Renewable Energy', 500000), (2, 'Tourism', 750000), (3, 'Manufacturing', 1000000);", "sql": "SELECT AVG(amount) FROM economic_diversification.investments;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who's captain of the team whose stadium has a capacity of 17800 people?", "schema": "CREATE TABLE table_23214833_1 (team_captain VARCHAR, capacity VARCHAR)", "sql": "SELECT team_captain FROM table_23214833_1 WHERE capacity = 17800;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Find the average safety score for each vehicle model in a specific country.", "schema": "CREATE TABLE Vehicle_Models (model_id INT, model VARCHAR(50), country_id INT); INSERT INTO Vehicle_Models (model_id, model, country_id) VALUES (1001, 'Tesla Model 3', 1); CREATE TABLE Safety_Tests (test_id INT, model_id INT, result INT, test_type VARCHAR(50)); INSERT INTO Safety_Tests (test_id, model_id, result, test_type) VALUES (1, 1001, 95, 'Crash Test'); CREATE TABLE Country (country_id INT, country_name VARCHAR(50)); INSERT INTO Country (country_id, country_name) VALUES (1, 'USA');", "sql": "SELECT vm.model, AVG(st.result) as \"Average Safety Score\" FROM Vehicle_Models vm JOIN Safety_Tests st ON vm.model_id = st.model_id JOIN Country c ON vm.country_id = c.country_id WHERE c.country_name = 'USA' GROUP BY vm.model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Find the policyholder with the highest claim amount in the 'Low Risk' underwriting group.", "schema": "CREATE TABLE underwriting (id INT, group VARCHAR(10), name VARCHAR(20), claim_amount DECIMAL(10,2)); INSERT INTO underwriting (id, group, name, claim_amount) VALUES (1, 'High Risk', 'John Doe', 5000.00), (2, 'Low Risk', 'Jane Smith', 2500.00), (3, 'High Risk', 'Mike Johnson', 7000.00), (4, 'Low Risk', 'Emma White', 3000.00);", "sql": "SELECT name, claim_amount FROM (SELECT name, claim_amount, ROW_NUMBER() OVER (PARTITION BY group ORDER BY claim_amount DESC) rn FROM underwriting WHERE group = 'Low Risk') sub WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 189, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Position has a Level of tier 3 and a Season smaller than 1999?", "schema": "CREATE TABLE table_name_38 (position VARCHAR, level VARCHAR, season VARCHAR)", "sql": "SELECT position FROM table_name_38 WHERE level = 'tier 3' AND season < 1999;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average salary for workers in the 'service_database' database who are members of a union and work in the 'cleaning' department?", "schema": "CREATE TABLE cleaners (id INT, name VARCHAR(50), salary DECIMAL(10, 2), is_union_member BOOLEAN, department VARCHAR(50)); INSERT INTO cleaners (id, name, salary, is_union_member, department) VALUES (1, 'Olivia', 90000.00, true, 'cleaning'), (2, 'Owen', 95000.00, true, 'cleaning'), (3, 'Olga', 80000.00, true, 'management');", "sql": "SELECT AVG(salary) FROM cleaners WHERE is_union_member = true AND department = 'cleaning';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Identify the number of sustainable tourism initiatives implemented in the 'Americas' region by year.", "schema": "CREATE TABLE sustainable_tourism (id INT, initiative_name VARCHAR(100), region VARCHAR(50), implementation_year INT); INSERT INTO sustainable_tourism (id, initiative_name, region, implementation_year) VALUES (1, 'Green Lodging', 'Americas', 2018), (2, 'Solar-Powered Sightseeing', 'Europe', 2020);", "sql": "SELECT implementation_year, COUNT(*) as num_initiatives FROM sustainable_tourism WHERE region = 'Americas' GROUP BY implementation_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the name of the couple if the number of dances is 6?", "schema": "CREATE TABLE table_23662272_4 (couple VARCHAR, number_of_dances VARCHAR)", "sql": "SELECT couple FROM table_23662272_4 WHERE number_of_dances = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player has 17 blocks?", "schema": "CREATE TABLE table_24912693_4 (player VARCHAR, blocks VARCHAR)", "sql": "SELECT player FROM table_24912693_4 WHERE blocks = 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total number of workers in the 'Hospitality' industry who are part of a union?", "schema": "CREATE TABLE workers (id INT, industry VARCHAR(255), salary FLOAT, union_member BOOLEAN); INSERT INTO workers (id, industry, salary, union_member) VALUES (1, 'Manufacturing', 50000.0, true), (2, 'Hospitality', 40000.0, true), (3, 'Retail', 30000.0, false);", "sql": "SELECT COUNT(*) FROM workers WHERE industry = 'Hospitality' AND union_member = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'brin_bloom' (example 73).", "schema": null, "sql": "CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2);", "explanation": "DDL from PostgreSQL core regression test for Brin Bloom.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'rules': Write the SELECT query (example 259).", "schema": null, "sql": "select * from rtest_vview3;", "explanation": "Regression test for Rules in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from rtest_vview3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "What is the total balance of clients with investment accounts in the San Francisco branch?", "schema": "CREATE TABLE clients (client_id INT, name TEXT, dob DATE, branch TEXT);CREATE TABLE accounts (account_id INT, client_id INT, account_type TEXT, balance DECIMAL);INSERT INTO clients VALUES (5, 'Charlie Brown', '1998-07-03', 'San Francisco');INSERT INTO accounts VALUES (105, 5, 'Investment', 15000);", "sql": "SELECT SUM(accounts.balance) FROM clients INNER JOIN accounts ON clients.client_id = accounts.client_id WHERE accounts.account_type = 'Investment' AND clients.branch = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What city did Yugoslavia play Norway in?", "schema": "CREATE TABLE table_name_2 (city VARCHAR, opponent VARCHAR)", "sql": "SELECT city FROM table_name_2 WHERE opponent = 'norway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Game on May 24 with Road Team Seattle?", "schema": "CREATE TABLE table_name_76 (game VARCHAR, road_team VARCHAR, date VARCHAR)", "sql": "SELECT game FROM table_name_76 WHERE road_team = 'seattle' AND date = 'may 24';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What are the wholesale orders with a payment amount greater than $500 for the current month, and their corresponding payment dates?", "schema": "CREATE TABLE wholesale_orders (id INT PRIMARY KEY, dispensary_id INT, strain_id INT, quantity INT, order_date DATE); CREATE TABLE wholesale_payments (id INT PRIMARY KEY, order_id INT, payment_amount FLOAT, payment_date DATE);", "sql": "SELECT wholesale_orders.dispensary_id, wholesale_orders.order_date, wholesale_payments.payment_amount, wholesale_payments.payment_date FROM wholesale_orders INNER JOIN wholesale_payments ON wholesale_orders.id = wholesale_payments.order_id WHERE wholesale_payments.payment_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL DAY(CURRENT_DATE) - 1 DAY) AND CURRENT_DATE AND wholesale_payments.payment_amount > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 406, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of the pick in round 17?", "schema": "CREATE TABLE table_name_10 (pick INTEGER, round VARCHAR)", "sql": "SELECT SUM(pick) FROM table_name_10 WHERE round = 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total revenue generated by cultural heritage tours in Italy in the last quarter?", "schema": "CREATE TABLE tours (tour_id INT, tour_name TEXT, country TEXT, revenue FLOAT, tour_date DATE); INSERT INTO tours (tour_id, tour_name, country, revenue, tour_date) VALUES (1, 'Roman Colosseum Tour', 'Italy', 10000.00, '2022-01-01'); INSERT INTO tours (tour_id, tour_name, country, revenue, tour_date) VALUES (2, 'Florence Uffizi Gallery Tour', 'Italy', 8000.00, '2022-03-15'); INSERT INTO tours (tour_id, tour_name, country, revenue, tour_date) VALUES (3, 'Roman Colosseum Tour', 'Italy', 12000.00, '2022-04-01');", "sql": "SELECT SUM(revenue) FROM tours WHERE country = 'Italy' AND tour_date >= '2022-01-01' AND tour_date < '2022-04-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "PostgreSQL Perform: show example 25.", "schema": null, "sql": "EXPLAIN SELECT * FROM tenk1 t1, tenk2 t2 WHERE t1.unique1 < 10 AND t2.unique2 < 10 AND t1.hundred < t2.hundred; QUERY PLAN -------------------------------------------------------------------&zwsp;-------------------------- Nested Loop (cost=4.65..49.36 rows=33 width=488) Join Filter: (t1.hundred < t2.hundred) -> Bitmap Heap Scan on tenk1 t1 (cost=4.36..39.38 rows=10 width=244) Recheck Cond: (unique1 < 10) -> Bitmap Index Scan on tenk1_unique1 (cost=0.00..4.36 rows=10 width=0) Index Cond: (unique1 < 10) -> Materialize (cost=0.29..8.51 rows=10 width=244) -> Index Scan using tenk2_unique2 on tenk2 t2 (cost=0.29..8.46 rows=10 width=244) Index Cond: (unique2 < 10);", "explanation": "Example from PostgreSQL documentation on Perform.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "admin_maintenance", "is_postgresql_specific": false, "sql_length": 668, "num_statements": 3} {"question": "What is the average carbon offset (in metric tons) for carbon offset programs in Europe that were initiated in 2018?", "schema": "CREATE TABLE if not exists carbon_offset_programs (program_id integer, program_start_date date, program_location varchar(255), carbon_offset_tons integer); INSERT INTO carbon_offset_programs (program_id, program_start_date, program_location, carbon_offset_tons) VALUES (1, '2018-01-01', 'France', 2000), (2, '2018-06-01', 'Germany', 2500), (3, '2018-12-31', 'Spain', 1500);", "sql": "SELECT program_location, AVG(carbon_offset_tons) as avg_offset FROM carbon_offset_programs WHERE program_start_date BETWEEN '2018-01-01' AND '2018-12-31' AND program_location LIKE 'Europe%' GROUP BY program_location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the surface tye during the match with the opponent of richard fromberg?", "schema": "CREATE TABLE table_name_3 (surface VARCHAR, opponent VARCHAR)", "sql": "SELECT surface FROM table_name_3 WHERE opponent = 'richard fromberg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many military bases are located in each state in the 'military_bases' and 'states' tables?", "schema": "CREATE TABLE states (state_id INT, state_name VARCHAR(50)); CREATE TABLE military_bases (base_id INT, base_name VARCHAR(50), state_id INT); INSERT INTO states VALUES (1, 'Alabama'), (2, 'Alaska'), (3, 'Arizona'); INSERT INTO military_bases VALUES (1, 'Fort Rucker', 1), (2, 'Fort Wainwright', 2), (3, 'Fort Huachuca', 3);", "sql": "SELECT s.state_name, COUNT(m.base_id) as bases_in_state FROM states s JOIN military_bases m ON s.state_id = m.state_id GROUP BY s.state_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Switzerland has how many gold and less than 0 silver?", "schema": "CREATE TABLE table_name_61 (gold INTEGER, nation VARCHAR, silver VARCHAR)", "sql": "SELECT MAX(gold) FROM table_name_61 WHERE nation = 'switzerland' AND silver < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many characteristics does the product named \"sesame\" have?", "schema": "CREATE TABLE product_characteristics (product_id VARCHAR); CREATE TABLE products (product_id VARCHAR, product_name VARCHAR)", "sql": "SELECT COUNT(*) FROM products AS t1 JOIN product_characteristics AS t2 ON t1.product_id = t2.product_id WHERE t1.product_name = 'sesame';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "How many gold medals were won in total in the last 3 Summer Olympics?", "schema": "CREATE TABLE olympics (year INT, season VARCHAR(50), total_gold INT); INSERT INTO olympics VALUES (2021, 'Summer', 307), (2018, 'Winter', 106), (2020, 'Summer', 339);", "sql": "SELECT SUM(total_gold) FROM olympics WHERE season = 'Summer' AND year IN (2016, 2018, 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "what is the average duration of education programs in the 'community_education' table?", "schema": "CREATE TABLE community_education (education_id INT, education_name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO community_education (education_id, education_name, start_date, end_date) VALUES (1, 'Animal Tracking', '2021-01-01', '2021-12-31'), (2, 'Habitat Conservation', '2021-04-01', '2021-12-31'), (3, 'Wildlife Photography', '2021-07-01', '2021-10-31');", "sql": "SELECT AVG(DATEDIFF(end_date, start_date)) FROM community_education;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which School has a Previous Conference of none (new school), and a Year Joined larger than 1960, and a Location of versailles?", "schema": "CREATE TABLE table_name_78 (school VARCHAR, location VARCHAR, previous_conference VARCHAR, year_joined VARCHAR)", "sql": "SELECT school FROM table_name_78 WHERE previous_conference = 'none (new school)' AND year_joined > 1960 AND location = 'versailles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the game was played at Mile High Stadium before week 9, what was the result?", "schema": "CREATE TABLE table_name_87 (result VARCHAR, week VARCHAR, game_site VARCHAR)", "sql": "SELECT result FROM table_name_87 WHERE week < 9 AND game_site = 'mile high stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Office has a Representative of scott pelath?", "schema": "CREATE TABLE table_name_81 (office VARCHAR, representative VARCHAR)", "sql": "SELECT office FROM table_name_81 WHERE representative = 'scott pelath';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the county name and population of all counties.", "schema": "CREATE TABLE county (County_name VARCHAR, Population VARCHAR)", "sql": "SELECT County_name, Population FROM county;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the air date of part 1 of the episode whose part 2 aired on December 2, 2007?", "schema": "CREATE TABLE table_13241993_3 (part_1 VARCHAR, part_2 VARCHAR)", "sql": "SELECT part_1 FROM table_13241993_3 WHERE part_2 = 'December 2, 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 141).", "schema": null, "sql": "INSERT INTO textrange_test VALUES('[,\"q\")');", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least attendance with visitor of edmonton", "schema": "CREATE TABLE table_name_98 (attendance INTEGER, visitor VARCHAR)", "sql": "SELECT MIN(attendance) FROM table_name_98 WHERE visitor = 'edmonton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Game Site of hoosier dome, and a Result of l 7–31 involved what attendance?", "schema": "CREATE TABLE table_name_29 (attendance VARCHAR, game_site VARCHAR, result VARCHAR)", "sql": "SELECT attendance FROM table_name_29 WHERE game_site = 'hoosier dome' AND result = 'l 7–31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the total number of satellites in orbit around Earth?", "schema": "CREATE TABLE SatelliteOrbits (SatelliteID INT, Name VARCHAR(100), Orbit VARCHAR(50));", "sql": "SELECT COUNT(*) FROM SatelliteOrbits WHERE Orbit = 'Earth';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In 2008, what was the world ranking that ranked 5th in L.A.?", "schema": "CREATE TABLE table_name_54 (world_ranking__1_ VARCHAR, ranking_la__2_ VARCHAR, year_of_publication VARCHAR)", "sql": "SELECT world_ranking__1_ FROM table_name_54 WHERE ranking_la__2_ = '5th' AND year_of_publication = '2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the average citizen feedback score for transportation services in the South region in 2022?", "schema": "CREATE TABLE Feedback (Year INT, Service VARCHAR(255), Region VARCHAR(255), Score DECIMAL(3,2)); INSERT INTO Feedback (Year, Service, Region, Score) VALUES (2022, 'Bus', 'South', 8.25), (2022, 'Train', 'South', 8.50), (2022, 'Taxi', 'South', 8.75);", "sql": "SELECT AVG(Score) FROM Feedback WHERE Year = 2022 AND Region = 'South' AND Service IN ('Bus', 'Train');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Array (example 14).", "schema": null, "sql": "do $$ declare a int[] := array[1,2,3];\nbegin\n -- test scenarios for optimization of updates of R/W expanded objects\n a := array_append(a, 42); -- optimizable using \"transfer\" method\n a := a || a[3]; -- optimizable using \"inplace\" method\n a := a[1] || a; -- ditto, but let's test array_prepend\n a := a || a; -- not optimizable\n raise notice 'a = %', a;\nend$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 371, "num_statements": 7} {"question": "What is the number of employees hired in the last 12 months by department?", "schema": "CREATE TABLE EmployeeHire (HireID INT, HireDate DATE, Department VARCHAR(20)); INSERT INTO EmployeeHire (HireID, HireDate, Department) VALUES (1, '2022-01-01', 'IT'), (2, '2022-02-01', 'HR'), (3, '2022-03-01', 'Marketing'), (4, '2021-04-01', 'IT');", "sql": "SELECT Department, COUNT(*) FROM EmployeeHire WHERE HireDate >= DATEADD(month, -12, GETDATE()) GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Position has Goals against larger than 32, and points larger than 30?", "schema": "CREATE TABLE table_name_22 (position VARCHAR, goals_against VARCHAR, points VARCHAR)", "sql": "SELECT position FROM table_name_22 WHERE goals_against > 32 AND points > 30;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the change in mental health score from the beginning to the end of the school year for each student?", "schema": "CREATE TABLE student_mental_health_year (student_id INT, date DATE, score INT);", "sql": "SELECT student_id, LAG(score) OVER (PARTITION BY student_id ORDER BY date) as beginning_score, score as end_score, score - LAG(score) OVER (PARTITION BY student_id ORDER BY date) as change FROM student_mental_health_year WHERE EXTRACT(MONTH FROM date) IN (5, 6, 7) AND EXTRACT(YEAR FROM date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 300, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 27).", "schema": null, "sql": "SELECT ts_lexize('hunspell', 'unbooking');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_lexize('hunspell', 'unbooking')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Insert new sustainable fabric 'Piñatex' with a sustainability rating of 4.6 and production cost of 3.5.", "schema": "CREATE TABLE fabrics (id INT, name VARCHAR(255), sustainability_rating FLOAT, production_cost FLOAT);", "sql": "INSERT INTO fabrics (name, sustainability_rating, production_cost) VALUES ('Piñatex', 4.6, 3.5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the maximum price of organic skincare products in France?", "schema": "CREATE TABLE skincare (id INT, name TEXT, price DECIMAL, is_organic BOOLEAN, country TEXT); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (1, 'Cleanser', 19.99, true, 'France'); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (2, 'Toner', 15.99, true, 'France'); INSERT INTO skincare (id, name, price, is_organic, country) VALUES (3, 'Moisturizer', 29.99, true, 'France');", "sql": "SELECT MAX(price) FROM skincare WHERE is_organic = true AND country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the earliest delivery date for each type of military equipment?", "schema": "CREATE TABLE EquipmentDeliveries (DeliveryID INT, Equipment VARCHAR(50), Quantity INT, DeliveryDate DATE); INSERT INTO EquipmentDeliveries (DeliveryID, Equipment, Quantity, DeliveryDate) VALUES (7, 'Tanks', 5, '2022-06-30'); INSERT INTO EquipmentDeliveries (DeliveryID, Equipment, Quantity, DeliveryDate) VALUES (8, 'Artillery', 3, '2023-02-14');", "sql": "SELECT Equipment, MIN(DeliveryDate) AS EarliestDeliveryDate FROM EquipmentDeliveries GROUP BY Equipment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 20).", "schema": null, "sql": "select '[\",\",\",\"]'::textrange;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[\",\",\",\"]'::textrange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Which electric vehicle models have the highest safety ratings?", "schema": "CREATE TABLE SafetyRatings (Model VARCHAR(50), Rating INT); INSERT INTO SafetyRatings (Model, Rating) VALUES ('Tesla Model 3', 5), ('Chevrolet Bolt', 5), ('Nissan Leaf', 4);", "sql": "SELECT Model, Rating FROM SafetyRatings ORDER BY Rating DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "pgTAP test for Runtests (assertion 12).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION whatever.shutdownmore() RETURNS SETOF TEXT AS $$\n SELECT pass('shutting down more');\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Runtests.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 2} {"question": "What is the total wastewater treatment capacity in Ghana in cubic meters?", "schema": "CREATE TABLE wastewater_treatment_m3 (country VARCHAR(20), region VARCHAR(20), value FLOAT); INSERT INTO wastewater_treatment_m3 (country, region, value) VALUES ('Ghana', NULL, 2000000);", "sql": "SELECT value FROM wastewater_treatment_m3 WHERE country = 'Ghana';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the high score for the player with 0 stumps, 4 catches, more than 14 inns and an average smaller than 56.1?", "schema": "CREATE TABLE table_name_52 (high_score INTEGER, inns VARCHAR, average VARCHAR, stump VARCHAR, catches VARCHAR)", "sql": "SELECT SUM(high_score) FROM table_name_52 WHERE stump = 0 AND catches = 4 AND average < 56.1 AND inns > 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average price of products in the Natural category, ranked by average price in descending order?", "schema": "CREATE TABLE products (product_id INT, category VARCHAR(20), price DECIMAL(5,2)); INSERT INTO products (product_id, category, price) VALUES (1, 'Natural', 25.99), (2, 'Organic', 30.49), (3, 'Natural', 29.99), (4, 'Conventional', 15.99);", "sql": "SELECT AVG(price) as avg_price, category FROM products GROUP BY category ORDER BY avg_price DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: state the number of location attendance where record is 15–10 (5–7)", "schema": "CREATE TABLE table_20010140_10 (location_attendance VARCHAR, record VARCHAR)", "sql": "SELECT COUNT(location_attendance) FROM table_20010140_10 WHERE record = '15–10 (5–7)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tournament had a score of 6–7 (0–7) , 6–2, 4–6?", "schema": "CREATE TABLE table_name_3 (tournament VARCHAR, score VARCHAR)", "sql": "SELECT tournament FROM table_name_3 WHERE score = '6–7 (0–7) , 6–2, 4–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the No. 10 which has a No. 8 of jackson, and a No. 9 of jayden?", "schema": "CREATE TABLE table_name_94 (no_10 VARCHAR, no_8 VARCHAR, no_9 VARCHAR)", "sql": "SELECT no_10 FROM table_name_94 WHERE no_8 = 'jackson' AND no_9 = 'jayden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "List the names of countries that have both eco-friendly hotels and cultural heritage sites, but no virtual tours or museums.", "schema": "CREATE TABLE eco_hotels (hotel_id INT, country VARCHAR(20), name VARCHAR(50)); INSERT INTO eco_hotels (hotel_id, country, name) VALUES (1, 'India', 'Green Resort'), (2, 'Mexico', 'Eco Retreat'), (3, 'Nepal', 'Sustainable Suites'); CREATE TABLE cultural_sites (site_id INT, country VARCHAR(20), type VARCHAR(20)); INSERT INTO cultural_sites (site_id, country, type) VALUES (1, 'India', 'heritage'), (2, 'Mexico', 'heritage'), (3, 'Nepal', 'heritage'); CREATE TABLE virtual_tours (tour_id INT, country VARCHAR(20), type VARCHAR(20)); INSERT INTO virtual_tours (tour_id, country, type) VALUES (1, 'India', 'virtual'), (2, 'Mexico', 'virtual'); CREATE TABLE museums (museum_id INT, country VARCHAR(20), type VARCHAR(20)); INSERT INTO museums (museum_id, country, type) VALUES (1, 'India', 'museum'), (2, 'Mexico', 'museum');", "sql": "(SELECT country FROM eco_hotels WHERE name IS NOT NULL) INTERSECT (SELECT country FROM cultural_sites WHERE type = 'heritage') EXCEPT (SELECT country FROM (SELECT * FROM virtual_tours WHERE type = 'virtual' UNION ALL SELECT * FROM museums WHERE type = 'museum') AS combined_data);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 280, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years did the Representative from Iowa with a lifespan of 1880–1942 serve?", "schema": "CREATE TABLE table_name_20 (years VARCHAR, state VARCHAR, lifespan VARCHAR)", "sql": "SELECT years FROM table_name_20 WHERE state = 'iowa' AND lifespan = '1880–1942';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest number for tropical Lows for the 1990–91 season with more than 10 tropical cyclones?", "schema": "CREATE TABLE table_name_13 (tropical_lows INTEGER, season VARCHAR, tropical_cyclones VARCHAR)", "sql": "SELECT MAX(tropical_lows) FROM table_name_13 WHERE season = '1990–91' AND tropical_cyclones > 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the average age of players who play 'Racing' games?", "schema": "CREATE TABLE Players (player_id INT, name VARCHAR(255), age INT, game_genre VARCHAR(255)); INSERT INTO Players (player_id, name, age, game_genre) VALUES (1, 'John', 27, 'FPS'), (2, 'Sarah', 30, 'RPG'), (3, 'Alex', 22, 'FPS'), (4, 'Max', 25, 'Strategy'), (5, 'Zoe', 28, 'Racing'), (6, 'Ella', 24, 'Racing');", "sql": "SELECT AVG(age) FROM Players WHERE game_genre = 'Racing';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the an-stem for the word which has an ö-stems of siangar and an u-stem ending of syni?", "schema": "CREATE TABLE table_name_18 (masculine_an_stems VARCHAR, feminine_ō_stems VARCHAR, masculine_u_stems VARCHAR)", "sql": "SELECT masculine_an_stems FROM table_name_18 WHERE feminine_ō_stems = 'siangar' AND masculine_u_stems = 'syni';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Pick has a Round larger than 17, and a Name of gene stewart?", "schema": "CREATE TABLE table_name_46 (pick VARCHAR, round VARCHAR, name VARCHAR)", "sql": "SELECT COUNT(pick) FROM table_name_46 WHERE round > 17 AND name = 'gene stewart';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the average number of attendance for the game on November 29, 1981 played after week 13?", "schema": "CREATE TABLE table_name_40 (attendance INTEGER, date VARCHAR, week VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_40 WHERE date = 'november 29, 1981' AND week > 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 420).", "schema": null, "sql": "SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY'::citext)\n = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the average age of community health workers by their race?", "schema": "CREATE TABLE community_health_workers (worker_id INT, name VARCHAR(50), age INT, race VARCHAR(50)); INSERT INTO community_health_workers (worker_id, name, age, race) VALUES (1, 'John Doe', 35, 'White'), (2, 'Jane Smith', 40, 'Black'), (3, 'Maria Garcia', 45, 'Hispanic');", "sql": "SELECT race, AVG(age) as avg_age FROM community_health_workers GROUP BY race;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the score of Jeff Maggert in T2 place?", "schema": "CREATE TABLE table_name_36 (score VARCHAR, place VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_36 WHERE place = 't2' AND player = 'jeff maggert';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What explainable AI techniques were applied in the past year for speech recognition tasks, in the Explainable AI database?", "schema": "CREATE TABLE techniques (id INT, name VARCHAR(255), domain VARCHAR(255), published_date DATE);", "sql": "SELECT name FROM techniques WHERE domain = 'Speech Recognition' AND YEAR(published_date) = YEAR(CURRENT_DATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the class of 2009?", "schema": "CREATE TABLE table_name_26 (class VARCHAR, year VARCHAR)", "sql": "SELECT class FROM table_name_26 WHERE year = 2009;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the health equity metric trend for the past year?", "schema": "CREATE TABLE health_equity_metrics (date DATE, metric FLOAT); INSERT INTO health_equity_metrics (date, metric) VALUES ('2021-01-01', 78.5), ('2021-02-01', 79.2), ('2021-03-01', 80.1), ('2021-04-01', 81.0), ('2021-05-01', 81.5), ('2021-06-01', 82.0), ('2021-07-01', 82.5), ('2021-08-01', 82.8), ('2021-09-01', 83.1), ('2021-10-01', 83.4), ('2021-11-01', 83.7), ('2021-12-01', 84.0);", "sql": "SELECT date, metric FROM health_equity_metrics ORDER BY date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What was the average sale price for 'Cubist' artworks in the 'Guggenheim' museum?", "schema": "CREATE TABLE Artworks (artwork_id INT, movement VARCHAR(255), sale_price DECIMAL(10, 2), museum_name VARCHAR(255));", "sql": "SELECT AVG(sale_price) FROM Artworks WHERE movement = 'Cubist' AND museum_name = 'Guggenheim';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average word count of articles written by female authors?", "schema": "CREATE TABLE news_articles (id INT, title VARCHAR(100), content TEXT, word_count INT, author_gender VARCHAR(10));", "sql": "SELECT AVG(word_count) FROM news_articles WHERE author_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who came in 3rd place in Krško , Slovenia Matije Gubca Stadium", "schema": "CREATE TABLE table_19317584_2 (city_and_venue VARCHAR)", "sql": "SELECT 3 AS rd_placed FROM table_19317584_2 WHERE city_and_venue = 'Krško , Slovenia Matije Gubca Stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "List the destinations with at least 5 tourists from Australia in 2023.", "schema": "CREATE TABLE tourism_data (id INT, name VARCHAR(50), country VARCHAR(50), destination VARCHAR(50), visit_year INT); INSERT INTO tourism_data (id, name, country, destination, visit_year) VALUES (1, 'Sarah Thompson', 'Australia', 'Sydney', 2023), (2, 'James Clark', 'Australia', 'Melbourne', 2023), (3, 'Grace White', 'Australia', 'Perth', 2023), (4, 'Lucas Green', 'Australia', 'Adelaide', 2023), (5, 'Emily Black', 'Australia', 'Brisbane', 2023), (6, 'Mia Taylor', 'Australia', 'Cairns', 2022);", "sql": "SELECT DISTINCT destination FROM tourism_data WHERE country = 'Australia' AND visit_year = 2023 GROUP BY destination HAVING COUNT(*) >= 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Which autonomous vehicles have the highest and lowest adoption rates in 'tokyo'?", "schema": "CREATE TABLE if not exists vehicle_types (vehicle_type varchar(20)); INSERT INTO vehicle_types (vehicle_type) VALUES ('autonomous'), ('manual'); CREATE TABLE if not exists adoption_rates (vehicle_type varchar(20), city varchar(20), adoption_rate float); INSERT INTO adoption_rates (vehicle_type, city, adoption_rate) VALUES ('autonomous', 'tokyo', 25.6), ('manual', 'tokyo', 74.1), ('autonomous', 'tokyo', 26.8), ('manual', 'tokyo', 73.9);", "sql": "SELECT vehicle_type, MAX(adoption_rate) as highest_rate, MIN(adoption_rate) as lowest_rate FROM adoption_rates WHERE city = 'tokyo' GROUP BY vehicle_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 24).", "schema": null, "sql": "SELECT keys AS keys_leaf_1\n FROM gist_page_items(get_raw_page('test_gist_idx_inc', 1), 'test_gist_idx_inc')\n WHERE itemoffset = 1;", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the top goal for the result of 2–3?", "schema": "CREATE TABLE table_name_39 (goal INTEGER, result VARCHAR)", "sql": "SELECT MAX(goal) FROM table_name_39 WHERE result = '2–3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The score of 73-71-70=214 belongs to what country?", "schema": "CREATE TABLE table_name_77 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_77 WHERE score = 73 - 71 - 70 = 214;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many cruelty-free haircare products were sold in Canada in the last 6 months?", "schema": "CREATE TABLE HaircareProducts (product_id INT, product_name VARCHAR(255), is_cruelty_free BOOLEAN, sales_date DATE);", "sql": "SELECT COUNT(*) FROM HaircareProducts WHERE is_cruelty_free = TRUE AND sales_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What is the maximum capacity of a renewable energy project for each type?", "schema": "CREATE TABLE projects (name TEXT, type TEXT, capacity INTEGER); INSERT INTO projects (name, type, capacity) VALUES ('Project 1', 'Wind', 100), ('Project 2', 'Solar', 200), ('Project 3', 'Wind', 300);", "sql": "SELECT type, MAX(capacity) FROM projects GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the minimum flight hours for aircrafts manufactured by Airbus?", "schema": "CREATE TABLE FlightSafety(id INT, aircraft_id INT, manufacturer VARCHAR(255), flight_hours INT); INSERT INTO FlightSafety(id, aircraft_id, manufacturer, flight_hours) VALUES (1, 1001, 'Boeing', 12000), (2, 1002, 'Airbus', 10500), (3, 1003, 'Boeing', 18000), (4, 1004, 'Airbus', 12000), (5, 1005, 'Airbus', 11000);", "sql": "SELECT MIN(flight_hours) FROM FlightSafety WHERE manufacturer = 'Airbus';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which college is Kellen Davis from?", "schema": "CREATE TABLE table_name_44 (college VARCHAR, player VARCHAR)", "sql": "SELECT college FROM table_name_44 WHERE player = 'kellen davis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average response time for emergency calls on weekends?", "schema": "CREATE TABLE emergencies (eid INT, call_time TIME, response_time INT);", "sql": "SELECT AVG(response_time) FROM emergencies WHERE DAYOFWEEK(call_time) IN (1, 7);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which driver had a grid of 18?", "schema": "CREATE TABLE table_name_58 (driver VARCHAR, grid VARCHAR)", "sql": "SELECT driver FROM table_name_58 WHERE grid = '18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Create a table named \"maintenance\" with columns \"maintenance_id\", \"vehicle_id\", \"date\", and \"description\".", "schema": "CREATE TABLE maintenance (maintenance_id INT, vehicle_id INT, date DATE, description VARCHAR(255));", "sql": "CREATE TABLE maintenance (maintenance_id INT, vehicle_id INT, date DATE, description VARCHAR(255));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Display the concerts with a revenue greater than the average revenue of concerts in 'New York'.", "schema": "CREATE TABLE Concerts (ConcertID INT, Artist VARCHAR(50), City VARCHAR(50), Revenue DECIMAL(10,2)); INSERT INTO Concerts (ConcertID, Artist, City, Revenue) VALUES (1, 'Taylor Swift', 'Los Angeles', 500000.00), (2, 'BTS', 'New York', 750000.00), (3, 'Adele', 'London', 600000.00), (4, 'Taylor Swift', 'New York', 350000.00);", "sql": "SELECT * FROM Concerts WHERE City = 'New York' GROUP BY City; SELECT * FROM Concerts WHERE Revenue > (SELECT AVG(Revenue) FROM (SELECT Revenue FROM Concerts WHERE City = 'New York' GROUP BY City));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 2} {"question": "What is the total number of veterans employed in the defense industry in the United States as of January 2022?", "schema": "CREATE TABLE veteran_employment (employee_id INT, industry_sector VARCHAR(50), employment_date DATE, is_veteran BOOLEAN);", "sql": "SELECT COUNT(employee_id) FROM veteran_employment WHERE industry_sector LIKE '%defense%' AND employment_date = '2022-01-01' AND is_veteran = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What is the total assets under management (AUM) for investment strategies with a risk level of 'moderate'?", "schema": "CREATE TABLE investment_strategies (strategy_id INT, name VARCHAR(50), risk_level VARCHAR(50), AUM DECIMAL(10,2)); INSERT INTO investment_strategies (strategy_id, name, risk_level, AUM) VALUES (1, 'Growth', 'Moderate', 5000000.00), (2, 'Income', 'Conservative', 3000000.00);", "sql": "SELECT SUM(AUM) FROM investment_strategies WHERE risk_level = 'Moderate';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which natural foundations have the highest customer ratings?", "schema": "CREATE TABLE cosmetics_info(product_name TEXT, is_natural BOOLEAN, rating DECIMAL); INSERT INTO cosmetics_info(product_name, is_natural, rating) VALUES('Natural Foundation 1', true, 4.5);", "sql": "SELECT product_name FROM cosmetics_info WHERE is_natural = true AND rating = (SELECT MAX(rating) FROM cosmetics_info WHERE is_natural = true);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the number of the crowd when the Wollongong Hawks were the home team?", "schema": "CREATE TABLE table_name_53 (crowd INTEGER, home_team VARCHAR)", "sql": "SELECT SUM(crowd) FROM table_name_53 WHERE home_team = 'wollongong hawks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Delete the 'Teen' size from the 'Size' table", "schema": "CREATE TABLE Size (id INT PRIMARY KEY, name VARCHAR(50), average_spending DECIMAL(5,2));", "sql": "DELETE FROM Size WHERE name = 'Teen';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Compare the landfill capacity of cities in the 'East Coast' region with those in the 'West Coast' region.", "schema": "CREATE TABLE cities (city_name VARCHAR(50), region VARCHAR(50)); INSERT INTO cities (city_name, region) VALUES ('New York', 'East Coast'), ('Boston', 'East Coast'), ('San Francisco', 'West Coast'), ('Los Angeles', 'West Coast'); CREATE TABLE landfill_capacity (city_name VARCHAR(50), capacity INT); INSERT INTO landfill_capacity (city_name, capacity) VALUES ('New York', 5000), ('Boston', 4000), ('San Francisco', 3000), ('Los Angeles', 6000);", "sql": "SELECT c.region, AVG(lc.capacity) as avg_capacity FROM landfill_capacity lc JOIN cities c ON lc.city_name = c.city_name GROUP BY c.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Moving From, when Ends is before 2011?", "schema": "CREATE TABLE table_name_65 (moving_from VARCHAR, ends INTEGER)", "sql": "SELECT moving_from FROM table_name_65 WHERE ends < 2011;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score with a Team with @ houston?", "schema": "CREATE TABLE table_name_35 (score VARCHAR, team VARCHAR)", "sql": "SELECT score FROM table_name_35 WHERE team = '@ houston';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "How many public bike sharing programs are available in each city?", "schema": "CREATE TABLE City (city_id INT, city_name VARCHAR(50)); CREATE TABLE Program (program_id INT, program_name VARCHAR(50), city_id INT);", "sql": "SELECT city_name, COUNT(*) as num_programs FROM City JOIN Program ON City.city_id = Program.city_id GROUP BY city_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Identify the number of eco-friendly hotels in each city in Spain.", "schema": "CREATE TABLE eco_friendly_hotels (hotel_id INT, name TEXT, city TEXT); INSERT INTO eco_friendly_hotels (hotel_id, name, city) VALUES (1, 'EcoHotel Madrid', 'Madrid'), (2, 'EcoHotel Barcelona', 'Barcelona'), (3, 'EcoHotel Valencia', 'Valencia');", "sql": "SELECT city, COUNT(*) as hotel_count FROM eco_friendly_hotels WHERE city IN (SELECT city FROM countries WHERE name = 'Spain') GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the minimum budget for a resilience project in the 'Energy' sector?", "schema": "CREATE TABLE ResilienceProjects (ProjectID int, Sector varchar(10), Budget int); INSERT INTO ResilienceProjects (ProjectID, Sector, Budget) VALUES (1, 'Water', 500000), (2, 'Transport', 800000), (3, 'Energy', 600000);", "sql": "SELECT MIN(Budget) AS MinBudget FROM ResilienceProjects WHERE Sector = 'Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'largeobject': Write the SELECT query (example 99).", "schema": null, "sql": "SELECT lo_creat(42);", "explanation": "Regression test for Largeobject in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT lo_creat(42)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 20, "num_statements": 1} {"question": "What is the percentage of patients in each region who have visited a hospital or clinic in the past year, grouped by region?", "schema": "CREATE TABLE patients (patient_id INT, region VARCHAR(20), visited_last_year BOOLEAN); INSERT INTO patients (patient_id, region, visited_last_year) VALUES (1, 'Rural', true), (2, 'Urban', false), (3, 'Rural', true); CREATE TABLE hospitals (hospital_id INT, region VARCHAR(20), beds INT); INSERT INTO hospitals (hospital_id, region, beds) VALUES (1, 'Rural', 50), (2, 'Urban', 100); CREATE TABLE clinics (clinic_id INT, region VARCHAR(20), beds INT); INSERT INTO clinics (clinic_id, region, beds) VALUES (1, 'Rural', 10), (2, 'Urban', 20); CREATE TABLE visits (patient_id INT, hospital_id INT, clinic_id INT, visit_year INT); INSERT INTO visits (patient_id, hospital_id, clinic_id, visit_year) VALUES (1, 1, NULL, 2022), (2, NULL, 2, 2022), (3, 1, NULL, 2022);", "sql": "SELECT s.region, (COUNT(p.patient_id) FILTER (WHERE p.visited_last_year = true) * 100.0 / COUNT(p.patient_id)) as percentage FROM patients p JOIN hospitals h ON p.region = h.region JOIN clinics c ON p.region = c.region JOIN states s ON p.region = s.region JOIN visits v ON p.patient_id = v.patient_id WHERE v.visit_year = 2022 GROUP BY s.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 345, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which Event that has an Opponent of kendall grove?", "schema": "CREATE TABLE table_name_2 (event VARCHAR, opponent VARCHAR)", "sql": "SELECT event FROM table_name_2 WHERE opponent = 'kendall grove';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the episode that aired at 7:30 pm ET?", "schema": "CREATE TABLE table_name_26 (date VARCHAR, time VARCHAR)", "sql": "SELECT date FROM table_name_26 WHERE time = '7:30 pm et';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score has an opponent gan teik chai lin woon fui?", "schema": "CREATE TABLE table_name_6 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_name_6 WHERE opponent = 'gan teik chai lin woon fui';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total number of votes in the 2005 elections?", "schema": "CREATE TABLE table_19698421_1 (total_votes INTEGER, year VARCHAR)", "sql": "SELECT MIN(total_votes) FROM table_19698421_1 WHERE year = '2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What day is collingwood the home side?", "schema": "CREATE TABLE table_name_79 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_79 WHERE home_team = 'collingwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "List all countries with deep-sea exploration programs and their start dates.", "schema": "CREATE TABLE countries (country_name TEXT, exploration_start_date DATE); INSERT INTO countries (country_name, exploration_start_date) VALUES ('Japan', '1950-04-01'), ('USA', '1960-01-01'), ('France', '1985-12-22');", "sql": "SELECT country_name, exploration_start_date FROM countries WHERE country_name IN ('Japan', 'USA', 'France');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Which country has the largest marine protected area?", "schema": "CREATE TABLE marine_protected_areas (name VARCHAR(255), country VARCHAR(255), area_size FLOAT); INSERT INTO marine_protected_areas (name, country, area_size) VALUES ('Ross Sea', 'New Zealand', 800000);", "sql": "SELECT country, MAX(area_size) as max_size FROM marine_protected_areas;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 9).", "schema": null, "sql": "SELECT INTERVAL '1.5 months' AS \"One month 15 days\";", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT INTERVAL '1.5 months' AS \"One month 15 days\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average rating of art programs in the Eastern region with a budget over $10,000?", "schema": "CREATE TABLE programs (id INT, region VARCHAR(50), budget DECIMAL(10,2), rating INT); INSERT INTO programs (id, region, budget, rating) VALUES (1, 'Midwest', 8000, 8), (2, 'Northeast', 12000, 9), (3, 'West Coast', 7000, 7), (4, 'Southeast', 15000, 10), (5, 'Eastern', 11000, 6);", "sql": "SELECT AVG(rating) FROM programs WHERE region = 'Eastern' AND budget > 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the name of physicians who took some appointment.", "schema": "CREATE TABLE appointment (Physician VARCHAR); CREATE TABLE physician (name VARCHAR, EmployeeID VARCHAR)", "sql": "SELECT T2.name FROM appointment AS T1 JOIN physician AS T2 ON T1.Physician = T2.EmployeeID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the total quantity of fair trade products sold in Europe?", "schema": "CREATE TABLE Sales (SaleID INT, Product VARCHAR(20), Quantity INT, Region VARCHAR(20)); INSERT INTO Sales VALUES (1, 'Fair Trade Coffee', 200, 'Europe'); INSERT INTO Sales VALUES (2, 'Fair Trade Tea', 300, 'Europe');", "sql": "SELECT SUM(Quantity) FROM Sales WHERE Product LIKE '%Fair Trade%' AND Region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Source that has the Rank of 78?", "schema": "CREATE TABLE table_name_86 (source VARCHAR, rank VARCHAR)", "sql": "SELECT source FROM table_name_86 WHERE rank = 78;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "How many local cultural events are organized in Japan annually?", "schema": "CREATE TABLE CulturalEvents (event_id INT, event_name TEXT, country TEXT, year_of_occurrence INT); INSERT INTO CulturalEvents (event_id, event_name, country, year_of_occurrence) VALUES (1, 'Japanese Flower Festival', 'Japan', 2022); INSERT INTO CulturalEvents (event_id, event_name, country, year_of_occurrence) VALUES (2, 'Japanese Art Exhibition', 'Japan', 2021); INSERT INTO CulturalEvents (event_id, event_name, country, year_of_occurrence) VALUES (3, 'Japanese Traditional Dance Festival', 'Japan', 2019);", "sql": "SELECT COUNT(*) FROM CulturalEvents WHERE country = 'Japan' GROUP BY year_of_occurrence;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the date of vacancy when the date of appointment is 1 january 2009?", "schema": "CREATE TABLE table_name_54 (date_of_vacancy VARCHAR, date_of_appointment VARCHAR)", "sql": "SELECT date_of_vacancy FROM table_name_54 WHERE date_of_appointment = '1 january 2009';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Delete the view for health equity metrics", "schema": "CREATE OR REPLACE VIEW health_equity_view AS SELECT * FROM health_equity;", "sql": "DROP VIEW IF EXISTS health_equity_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rowsecurity' (example 1145).", "schema": null, "sql": "-- Function changes row_security setting and so invalidates plan\ncreate function rls_f(text) returns text\nbegin atomic\n select set_config('rls_test.blah', $1, true) || set_config('row_security', 'false', true) || string_agg(c, ',' order by c) from rls_t;", "explanation": "PL/pgSQL object from PostgreSQL core test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 254, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The Dallas Burn had a 12-9-7 record and what number of goals for?", "schema": "CREATE TABLE table_1253396_5 (goals_for VARCHAR, overall_record VARCHAR)", "sql": "SELECT goals_for FROM table_1253396_5 WHERE overall_record = '12-9-7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average speed of spacecraft in the space_exploration table?", "schema": "CREATE TABLE space_exploration (id INT, name VARCHAR(20), launch_date DATE, max_speed FLOAT); INSERT INTO space_exploration (id, name, launch_date, max_speed) VALUES (1, 'Voyager 1', '1977-09-05', 61000), (2, 'New Horizons', '2006-01-19', 58000), (3, 'Parker Solar Probe', '2018-08-12', 724200);", "sql": "SELECT AVG(max_speed) FROM space_exploration;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "List all indigenous food systems that use 'permaculture' techniques.", "schema": "CREATE TABLE indigenous_food_systems (id INT, name TEXT, location TEXT, techniques TEXT); INSERT INTO indigenous_food_systems (id, name, location, techniques) VALUES (1, 'System 1', 'Location 1', 'Permaculture, Agroforestry'), (2, 'System 2', 'Location 2', 'Agroecology');", "sql": "SELECT name FROM indigenous_food_systems WHERE techniques LIKE '%Permaculture%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which home town was the high school Catholic University located in?", "schema": "CREATE TABLE table_12032893_1 (home_town VARCHAR, high_school VARCHAR)", "sql": "SELECT home_town FROM table_12032893_1 WHERE high_school = 'Catholic University';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the category of most popular star with a result of won for 2007, what was the award?", "schema": "CREATE TABLE table_name_27 (award VARCHAR, year VARCHAR, result VARCHAR, category VARCHAR)", "sql": "SELECT award FROM table_name_27 WHERE result = 'won' AND category = 'most popular star' AND year = 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what poll is John Oxendine at 32%?", "schema": "CREATE TABLE table_name_67 (poll_source VARCHAR, john_oxendine VARCHAR)", "sql": "SELECT poll_source FROM table_name_67 WHERE john_oxendine = '32%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum billing amount for cases handled by attorneys from Jakarta with more than 7 years of experience?", "schema": "CREATE TABLE Attorneys (AttorneyID INT, YearsOfExperience INT, City VARCHAR(255)); INSERT INTO Attorneys (AttorneyID, YearsOfExperience, City) VALUES (1, 10, 'Jakarta'); INSERT INTO Attorneys (AttorneyID, YearsOfExperience, City) VALUES (2, 3, 'New York'); INSERT INTO Attorneys (AttorneyID, YearsOfExperience, City) VALUES (3, 7, 'Jakarta'); CREATE TABLE Cases (CaseID INT, AttorneyID INT, BillingAmount INT); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (1, 1, 2000); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (2, 1, 3000); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (3, 2, 1000); INSERT INTO Cases (CaseID, AttorneyID, BillingAmount) VALUES (4, 3, 1500);", "sql": "SELECT MAX(BillingAmount) FROM Cases JOIN Attorneys ON Cases.AttorneyID = Attorneys.AttorneyID WHERE Attorneys.City = 'Jakarta' AND Attorneys.YearsOfExperience > 7;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Delete records of soldiers who were discharged before 2010-01-01 from the soldiers_discharge_data table", "schema": "CREATE TABLE soldiers_discharge_data (soldier_id INT, name VARCHAR(50), rank VARCHAR(50), discharge_date DATE);", "sql": "DELETE FROM soldiers_discharge_data WHERE discharge_date < '2010-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the wraps per inch for 120-240", "schema": "CREATE TABLE table_20297668_1 (wraps_per_inch__wpi_ VARCHAR, m_100g VARCHAR)", "sql": "SELECT wraps_per_inch__wpi_ FROM table_20297668_1 WHERE m_100g = '120-240';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the population for the place with an area of 2.33 km2?", "schema": "CREATE TABLE table_2588674_1 (population__2000_census__ INTEGER, area_km² VARCHAR)", "sql": "SELECT MAX(population__2000_census__) FROM table_2588674_1 WHERE area_km² = '2.33';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHich Category in White has a Black of 38,05%?", "schema": "CREATE TABLE table_name_35 (white VARCHAR, black VARCHAR)", "sql": "SELECT white FROM table_name_35 WHERE black = '38,05%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total REE production in 2020?", "schema": "CREATE TABLE production (year INT, element TEXT, quantity INT); INSERT INTO production (year, element, quantity) VALUES (2015, 'Dysprosium', 100), (2016, 'Dysprosium', 150), (2017, 'Dysprosium', 200), (2018, 'Dysprosium', 250), (2019, 'Dysprosium', 300), (2020, 'Dysprosium', 350), (2015, 'Neodymium', 500), (2016, 'Neodymium', 600), (2017, 'Neodymium', 700), (2018, 'Neodymium', 800), (2019, 'Neodymium', 900), (2020, 'Neodymium', 1000);", "sql": "SELECT SUM(quantity) FROM production WHERE year = 2020 AND element IN ('Dysprosium', 'Neodymium');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which episode number aired on 18 october 2012?", "schema": "CREATE TABLE table_25721_3 (episode_no INTEGER, airdate VARCHAR)", "sql": "SELECT MIN(episode_no) FROM table_25721_3 WHERE airdate = '18 October 2012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the Eastern Creek Raceway located?", "schema": "CREATE TABLE table_name_19 (location___state VARCHAR, circuit VARCHAR)", "sql": "SELECT location___state FROM table_name_19 WHERE circuit = 'eastern creek raceway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 146).", "schema": null, "sql": "SELECT tablename, null_frac\nFROM pg_stats\nWHERE schemaname like 'pg_temp%'\nAND tablename = 'stats_temp'\nAND inherited = false\nAND attname = 'i';", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tablename, null_frac\nFROM pg_stats\nWHERE schemaname like 'pg_temp%'\nAND tablename = 'stats_temp'\nAND inherited = false\nAND attname = 'i') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the total number of military bases and their corresponding city in the United States?", "schema": "CREATE TABLE military_bases (id INT PRIMARY KEY, name VARCHAR(255), type VARCHAR(255), location VARCHAR(255)); INSERT INTO military_bases (id, name, type, location) VALUES (1, 'Fort Bragg', 'Army Base', 'Fayetteville, NC');", "sql": "SELECT COUNT(*) as total_bases, location FROM military_bases WHERE location LIKE '%, USA' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 590).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _temptable ( anyarray, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Delete all regions with no countries from the regions table", "schema": "CREATE TABLE countries (id INT, name VARCHAR(50), region VARCHAR(50)); CREATE TABLE regions (id INT, name VARCHAR(50)); INSERT INTO regions (id, name) VALUES (1, 'North America'), (2, 'Europe'), (3, 'Asia'), (4, 'Antarctica'); INSERT INTO countries (id, name, region) VALUES (1, 'USA', 'North America'), (2, 'Russia', 'Europe'), (3, 'China', 'Asia'), (4, 'Terra Nova', 'Antarctica');", "sql": "DELETE FROM regions WHERE id NOT IN (SELECT region FROM countries);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When David McNamara was the opponent in the final, what was the tier?", "schema": "CREATE TABLE table_name_59 (tier VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT tier FROM table_name_59 WHERE opponent_in_the_final = 'david mcnamara';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the highest assists during game 78?", "schema": "CREATE TABLE table_27721131_11 (high_assists VARCHAR, game VARCHAR)", "sql": "SELECT high_assists FROM table_27721131_11 WHERE game = 78;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "How many donors have made donations greater than $1000 in the Europe region?", "schema": "CREATE TABLE donors (id INT, name TEXT, region TEXT, donation_amount DECIMAL(10,2)); INSERT INTO donors (id, name, region, donation_amount) VALUES (1, 'John Smith', 'Asia-Pacific', 500.00), (2, 'Jane Doe', 'Europe', 1500.00), (3, 'James Lee', 'Asia-Pacific', 2000.00);", "sql": "SELECT COUNT(*) FROM donors WHERE donation_amount > 1000 AND region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List the case IDs and outcomes for cases that were resolved in New York or Illinois.", "schema": "CREATE TABLE cases (case_id INT, case_outcome VARCHAR(10), case_state VARCHAR(10)); INSERT INTO cases (case_id, case_outcome, case_state) VALUES (1, 'Won', 'New York'), (2, 'Lost', 'Illinois'), (3, 'Pending', 'Texas');", "sql": "SELECT case_id, case_outcome FROM cases WHERE case_state = 'New York' UNION SELECT case_id, case_outcome FROM cases WHERE case_state = 'Illinois';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Who are the customers with a high account balance in the 'Americas' region?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(50), region VARCHAR(50), account_balance DECIMAL(10,2)); INSERT INTO customers (id, name, region, account_balance) VALUES (1, 'John Doe', 'New York', 50000.00); INSERT INTO customers (id, name, region, account_balance) VALUES (2, 'Jane Smith', 'California', 60000.00); INSERT INTO customers (id, name, region, account_balance) VALUES (3, 'Bob Johnson', 'APAC', 30000.00); INSERT INTO customers (id, name, region, account_balance) VALUES (4, 'Alice Williams', 'APAC', 40000.00);", "sql": "SELECT * FROM customers WHERE region = 'Americas' AND account_balance > 40000.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum ocean acidification level in the North Pacific Ocean?", "schema": "CREATE TABLE ocean_acidification (id INT, location TEXT, level FLOAT); INSERT INTO ocean_acidification (id, location, level) VALUES (1, 'North Pacific', 8.1); INSERT INTO ocean_acidification (id, location, level) VALUES (2, 'South Pacific', 7.7); INSERT INTO ocean_acidification (id, location, level) VALUES (3, 'North Atlantic', 7.9);", "sql": "SELECT MAX(level) FROM ocean_acidification WHERE location = 'North Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the home team at kardinia park?", "schema": "CREATE TABLE table_name_16 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team FROM table_name_16 WHERE venue = 'kardinia park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Count the number of properties in Los Angeles with a listing price above $600,000 and inclusive housing features.", "schema": "CREATE TABLE properties (id INT, city VARCHAR(20), listing_price INT, inclusive_housing BOOLEAN); INSERT INTO properties (id, city, listing_price, inclusive_housing) VALUES (1, 'Los Angeles', 700000, true); INSERT INTO properties (id, city, listing_price, inclusive_housing) VALUES (2, 'Los Angeles', 600000, false);", "sql": "SELECT COUNT(*) FROM properties WHERE city = 'Los Angeles' AND listing_price > 600000 AND inclusive_housing = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the average attendance for cultural events in the city of 'New York'?", "schema": "CREATE TABLE cultural_events (id INT, city VARCHAR(20), attendance INT); INSERT INTO cultural_events (id, city, attendance) VALUES (1, 'New York', 2000), (2, 'Los Angeles', 3000), (3, 'New York', 2500);", "sql": "SELECT AVG(attendance) FROM cultural_events WHERE city = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'constraints': Write the SELECT query (example 211).", "schema": null, "sql": "SELECT conname FROM pg_constraint WHERE conrelid = 'parted_fk_naming_1'::regclass AND contype = 'f';", "explanation": "Regression test for Constraints in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT conname FROM pg_constraint WHERE conrelid = 'parted_fk_naming_1'::regclass AND contype = 'f') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 130).", "schema": null, "sql": "SELECT '0'::seg >> '0 .. 1'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show the number of employees in each department, sorted by the number of employees in descending order in the \"employees\" table.", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(20)); INSERT INTO employees (id, name, department) VALUES (1, 'Anna Smith', 'News'), (2, 'John Doe', 'News'), (3, 'Sara Connor', 'News'), (4, 'Mike Johnson', 'Sports'), (5, 'Emma White', 'Sports'), (6, 'Alex Brown', 'IT');", "sql": "SELECT department, COUNT(*) AS num_employees FROM employees GROUP BY department ORDER BY num_employees DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of the Everton Away game?", "schema": "CREATE TABLE table_name_95 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_95 WHERE away_team = 'everton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "List the case numbers, outcomes, and corresponding legal precedent IDs for cases in the 'criminal' schema, ordered by case number.", "schema": "CREATE SCHEMA criminal; CREATE TABLE case_outcomes (case_number INT, outcome VARCHAR(255)); CREATE TABLE legal_precedents (precedent_id INT, case_number INT);", "sql": "SELECT co.case_number, co.outcome, lp.precedent_id FROM criminal.case_outcomes co INNER JOIN criminal.legal_precedents lp ON co.case_number = lp.case_number ORDER BY co.case_number;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What are the names and average attendance of all stadiums that have hosted a game in the last 5 years?", "schema": "CREATE TABLE stadiums (id INT, name TEXT, capacity INT); CREATE TABLE games (id INT, stadium_id INT, home_team_id INT, away_team_id INT, home_team_score INT, away_team_score INT, game_date DATE); CREATE TABLE attendance (game_id INT, team_id INT, fans_attended INT);", "sql": "SELECT s.name, AVG(a.fans_attended) FROM stadiums s INNER JOIN games g ON s.id = g.stadium_id INNER JOIN attendance a ON g.id = a.game_id WHERE g.game_date >= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) GROUP BY s.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the territory with 17 screens?", "schema": "CREATE TABLE table_name_91 (territory VARCHAR, screens VARCHAR)", "sql": "SELECT territory FROM table_name_91 WHERE screens = 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the maximum seating capacity of public ferries in Hong Kong?", "schema": "CREATE TABLE public_ferries(id INT, ferry_number INT, city VARCHAR(20), max_seating_capacity INT);", "sql": "SELECT MAX(max_seating_capacity) FROM public_ferries WHERE city = 'Hong Kong';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years in orlando have the United States as the nationality, and montana as the school/club team?", "schema": "CREATE TABLE table_name_16 (years_in_orlando VARCHAR, nationality VARCHAR, school_club_team VARCHAR)", "sql": "SELECT years_in_orlando FROM table_name_16 WHERE nationality = 'united states' AND school_club_team = 'montana';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "List the departments with no research grants.", "schema": "CREATE TABLE Departments (DepartmentID INT, DepartmentName VARCHAR(50), ResearchGrants INT);", "sql": "SELECT DepartmentName FROM Departments WHERE ResearchGrants = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Calculate the percentage of products sold for each brand in the 'ethical_labor2' table, ordered by the percentage in descending order.", "schema": "CREATE TABLE ethical_labor2 (product_id INT, brand VARCHAR(255), quantity_sold INT); INSERT INTO ethical_labor2 (product_id, brand, quantity_sold) VALUES (4, 'BrandV', 500), (5, 'BrandW', 800), (6, 'BrandX', 700);", "sql": "SELECT brand, (SUM(quantity_sold) OVER (PARTITION BY brand) * 100.0 / SUM(quantity_sold) OVER ()) AS percentage_sold FROM ethical_labor2 GROUP BY brand ORDER BY percentage_sold DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much did Jerry Barber score to come in at T9?", "schema": "CREATE TABLE table_name_79 (score VARCHAR, place VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_79 WHERE place = 't9' AND player = 'jerry barber';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the termini of the route with junctions of i-35 fm 3338 sh 255?", "schema": "CREATE TABLE table_name_2 (termini VARCHAR, junctions VARCHAR)", "sql": "SELECT termini FROM table_name_2 WHERE junctions = 'i-35 fm 3338 sh 255';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number value has the Other transliteration of muoy roy?", "schema": "CREATE TABLE table_name_90 (value VARCHAR, other VARCHAR)", "sql": "SELECT value FROM table_name_90 WHERE other = 'muoy roy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the horizontal 0 for 明弦(ry日月)", "schema": "CREATE TABLE table_25519358_1 (horizontal_0_a VARCHAR, hypotenuse_0_c VARCHAR)", "sql": "SELECT horizontal_0_a FROM table_25519358_1 WHERE hypotenuse_0_c = '明弦(RY日月)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all animals and their ages from the 'animal_population' table, ordered by age.", "schema": "CREATE TABLE animal_population (animal_id INT, animal_type VARCHAR(10), age INT); INSERT INTO animal_population (animal_id, animal_type, age) VALUES (1, 'hippo', 18); INSERT INTO animal_population (animal_id, animal_type, age) VALUES (2, 'rhino', 12); INSERT INTO animal_population (animal_id, animal_type, age) VALUES (3, 'hippo', 20);", "sql": "SELECT * FROM animal_population ORDER BY age;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "How many social impact investments were made in 'Africa' in 2020?", "schema": "CREATE TABLE investments (id INT, location VARCHAR(50), investment_year INT, investment_type VARCHAR(20)); INSERT INTO investments (id, location, investment_year, investment_type) VALUES (1, 'Africa', 2020, 'social impact'), (2, 'Europe', 2019, 'social impact'), (3, 'Africa', 2020, 'traditional'), (4, 'North America', 2021, 'social impact');", "sql": "SELECT COUNT(*) FROM investments WHERE location = 'Africa' AND investment_year = 2020 AND investment_type = 'social impact';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year had an edition of 115th?", "schema": "CREATE TABLE table_name_21 (year INTEGER, edition VARCHAR)", "sql": "SELECT AVG(year) FROM table_name_21 WHERE edition = '115th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the minimum depth of all marine protected areas in the Arctic Ocean?", "schema": "CREATE TABLE marine_protected_areas (name VARCHAR(255), depth FLOAT, ocean VARCHAR(255));", "sql": "SELECT MIN(depth) FROM marine_protected_areas WHERE ocean = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score when the Maple Leafs played the Columbus Blue Jackets?", "schema": "CREATE TABLE table_16864968_7 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_16864968_7 WHERE opponent = 'Columbus Blue Jackets';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the call sign for ERP W of 27", "schema": "CREATE TABLE table_name_65 (call_sign VARCHAR, erp_w VARCHAR)", "sql": "SELECT call_sign FROM table_name_65 WHERE erp_w = 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'create_am': Write the SELECT query (example 87).", "schema": null, "sql": "SELECT relam FROM pg_class WHERE relname = 'am_partitioned';", "explanation": "Regression test for Create Am in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT relam FROM pg_class WHERE relname = 'am_partitioned') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 60, "num_statements": 1} {"question": "How many clinical trials were conducted by each organization in 2019?", "schema": "CREATE TABLE clinical_trials (trial_id INT, organization_id INT, country VARCHAR(255), approval_date DATE); INSERT INTO clinical_trials (trial_id, organization_id, country, approval_date) VALUES (1, 1, 'USA', '2019-01-01'), (2, 1, 'Canada', '2019-04-01'), (3, 2, 'Mexico', '2019-07-01');", "sql": "SELECT organization_id, COUNT(*) as num_trials FROM clinical_trials WHERE YEAR(approval_date) = 2019 GROUP BY organization_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the rate for 73 RYds?", "schema": "CREATE TABLE table_name_22 (rate VARCHAR, ryds VARCHAR)", "sql": "SELECT rate FROM table_name_22 WHERE ryds = '73';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Drop the 'satellite_deployment' table", "schema": "CREATE TABLE satellite_deployment (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), launch_date DATE);", "sql": "DROP TABLE satellite_deployment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many female students have milk or egg allergies?", "schema": "CREATE TABLE Student (StuID VARCHAR, sex VARCHAR); CREATE TABLE has_allergy (StuID VARCHAR, allergy VARCHAR)", "sql": "SELECT COUNT(*) FROM has_allergy AS T1 JOIN Student AS T2 ON T1.StuID = T2.StuID WHERE T2.sex = 'F' AND T1.allergy = 'Milk' OR T1.allergy = 'Eggs';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Insert a new menu item 'Veggie Burger' into the 'Dinner' menu with a price of 13.99", "schema": "CREATE TABLE Menu (menu_name VARCHAR(20), item_name VARCHAR(30), price DECIMAL(5,2));", "sql": "INSERT INTO Menu (menu_name, item_name, price) VALUES ('Dinner', 'Veggie Burger', 13.99);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 13).", "schema": null, "sql": "CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Which country had the largest decrease in Yttrium production from 2020 to 2021?", "schema": "CREATE TABLE YttriumProduction (country VARCHAR(50), year INT, production INT); INSERT INTO YttriumProduction (country, year, production) VALUES ('China', 2020, 1200), ('China', 2021, 1100), ('USA', 2020, 1000), ('USA', 2021, 1100), ('Australia', 2020, 800), ('Australia', 2021, 700);", "sql": "SELECT country, MIN(production_change) FROM (SELECT country, (production - LAG(production) OVER (PARTITION BY country ORDER BY year)) AS production_change FROM YttriumProduction) AS subquery WHERE production_change IS NOT NULL GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 244, "num_statements": 1} {"question": "How many art pieces were created by artists from Africa in the 'sculpture' medium?", "schema": "CREATE TABLE art_pieces (id INT, title TEXT, artist_name TEXT, medium TEXT, region TEXT); INSERT INTO art_pieces (id, title, artist_name, medium, region) VALUES (1, 'African Mask', 'Unknown', 'sculpture', 'Africa');", "sql": "SELECT COUNT(*) FROM art_pieces WHERE medium = 'sculpture' AND region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total revenue for music artists by genre?", "schema": "CREATE TABLE Music_Artists_Genre (id INT, name VARCHAR(100), genre VARCHAR(50), revenue DECIMAL(10,2)); INSERT INTO Music_Artists_Genre (id, name, genre, revenue) VALUES (1, 'Adele', 'Pop', 1000000.00), (2, 'Eminem', 'Rap', 800000.00), (3, 'Metallica', 'Rock', 1200000.00);", "sql": "SELECT genre, SUM(revenue) FROM Music_Artists_Genre GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 29).", "schema": null, "sql": "SELECT timestamp with time zone 'J2452271T040506.789-08';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT timestamp with time zone 'J2452271T040506.789-08') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total revenue generated by attorneys from the 'Boston' region in the 'Criminal' practice area?", "schema": "CREATE TABLE Attorneys (AttorneyID INT, Name TEXT, Region TEXT, Practice TEXT, Revenue FLOAT); INSERT INTO Attorneys (AttorneyID, Name, Region, Practice, Revenue) VALUES (1, 'John Doe', 'Boston', 'Criminal', 50000.00); INSERT INTO Attorneys (AttorneyID, Name, Region, Practice, Revenue) VALUES (2, 'Jane Smith', 'New York', 'Civil', 75000.00);", "sql": "SELECT SUM(Revenue) FROM Attorneys WHERE Region = 'Boston' AND Practice = 'Criminal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Mascot has an Enrollment of 640?", "schema": "CREATE TABLE table_name_63 (mascot VARCHAR, enrollment VARCHAR)", "sql": "SELECT mascot FROM table_name_63 WHERE enrollment = 640;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many users have interacted with content related to the hashtag \"#climatechange\" in the last year?", "schema": "CREATE TABLE users (id INT); CREATE TABLE posts (id INT, user_id INT, hashtags TEXT);", "sql": "SELECT COUNT(DISTINCT users.id) FROM users INNER JOIN posts ON users.id = posts.user_id WHERE FIND_IN_SET('climatechange', posts.hashtags) > 0 AND posts.created_at >= DATE_SUB(NOW(), INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes are numbered 12x03?", "schema": "CREATE TABLE table_23292220_13 (first_broadcast VARCHAR, episode VARCHAR)", "sql": "SELECT COUNT(first_broadcast) FROM table_23292220_13 WHERE episode = '12x03';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Norway's least ends lost?", "schema": "CREATE TABLE table_16922657_2 (Ends INTEGER, country VARCHAR, Norway VARCHAR)", "sql": "SELECT MIN(Ends) AS lost FROM table_16922657_2 WHERE country = Norway;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the rank of each mediator based on the number of cases handled, with ties broken by age?", "schema": "CREATE TABLE Mediators (MediatorID INT, Name VARCHAR(50), Age INT, Experience INT); INSERT INTO Mediators (MediatorID, Name, Age, Experience) VALUES (1, 'John Doe', 45, 12), (2, 'Jane Smith', 38, 7), (3, 'Alice Johnson', 42, 18), (4, 'Bob Brown', 50, 25); CREATE TABLE Cases (CaseID INT, MediatorID INT, Date DATE); INSERT INTO Cases (CaseID, MediatorID, Date) VALUES (1, 1, '2021-01-01'), (2, 1, '2021-02-01'), (3, 2, '2021-03-01'), (4, 3, '2021-04-01'), (5, 3, '2021-05-01'), (6, 4, '2021-06-01');", "sql": "SELECT MediatorID, Name, RANK() OVER (ORDER BY COUNT(*) DESC, Age) as Rank FROM Mediators JOIN Cases ON Mediators.MediatorID = Cases.MediatorID GROUP BY MediatorID, Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date for pescara", "schema": "CREATE TABLE table_name_76 (date VARCHAR, circuit VARCHAR)", "sql": "SELECT date FROM table_name_76 WHERE circuit = 'pescara';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What are the total sales figures per region for the 'sales_by_region' table?", "schema": "CREATE TABLE sales_by_region (region TEXT, drug_name TEXT, sales_q1 INT, sales_q2 INT, sales_q3 INT, sales_q4 INT); INSERT INTO sales_by_region (region, drug_name, sales_q1, sales_q2, sales_q3, sales_q4) VALUES ('North', 'DrugA', 400, 500, 600, 800), ('South', 'DrugA', 300, 350, 400, 500), ('East', 'DrugA', 500, 600, 700, 900), ('West', 'DrugA', 600, 700, 800, 1000), ('North', 'DrugB', 500, 600, 700, 800), ('South', 'DrugB', 400, 450, 500, 600), ('East', 'DrugB', 600, 700, 800, 900), ('West', 'DrugB', 700, 800, 900, 1000);", "sql": "SELECT region, SUM(sales_q1 + sales_q2 + sales_q3 + sales_q4) as total_sales FROM sales_by_region GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the To par of the Player with a Score of 70-71=141?", "schema": "CREATE TABLE table_name_58 (to_par INTEGER, score VARCHAR)", "sql": "SELECT AVG(to_par) FROM table_name_58 WHERE score = 70 - 71 = 141;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date for the home detroit and visitor was chicago?", "schema": "CREATE TABLE table_name_82 (date VARCHAR, home VARCHAR, visitor VARCHAR)", "sql": "SELECT date FROM table_name_82 WHERE home = 'detroit' AND visitor = 'chicago';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Find the maximum number of artworks created by an individual artist", "schema": "CREATE TABLE Artists (ArtistID INT PRIMARY KEY, ArtistName VARCHAR(255), NumberOfArtworks INT); INSERT INTO Artists (ArtistID, ArtistName, NumberOfArtworks) VALUES (1, 'Vincent van Gogh', 2100), (2, 'Pablo Picasso', 1347), (3, 'Claude Monet', 1643), (4, 'Jackson Pollock', 287);", "sql": "SELECT MAX(NumberOfArtworks) AS MaxArtworks FROM Artists;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the difference in the number of flights operated by 'Intergalactic' and 'UniversalAirlines'?", "schema": "CREATE TABLE flights (id INT, airline VARCHAR(255)); INSERT INTO flights (id, airline) VALUES (1, 'Intergalactic'), (2, 'UniversalAirlines'), (3, 'Intergalactic'), (4, 'UniversalAirlines'), (5, 'Intergalactic');", "sql": "SELECT COUNT(*) FILTER (WHERE airline = 'Intergalactic') - COUNT(*) FILTER (WHERE airline = 'UniversalAirlines') as difference;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the reunion weight of the contestant who lost 74.4 lbs at the finale?", "schema": "CREATE TABLE table_28654454_5 (reunion_weight VARCHAR, lbs_lost_finale VARCHAR)", "sql": "SELECT reunion_weight FROM table_28654454_5 WHERE lbs_lost_finale = '74.4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 453).", "schema": null, "sql": "end $$;\n\n-- fail because cursor has no query bound to it\n\ncreate or replace function forc_bad() returns void as $$\ndeclare\n c refcursor;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 137, "num_statements": 2} {"question": "What is the total amount spent on raw materials for each product line, including the cost of sub-components?", "schema": "CREATE TABLE raw_materials (id INT, product_line VARCHAR(50), amount INT, sub_components VARCHAR(50)); INSERT INTO raw_materials (id, product_line, amount, sub_components) VALUES (1, 'product1', 10000, 'component1,component2'); INSERT INTO raw_materials (id, product_line, amount, sub_components) VALUES (2, 'product2', 15000, 'component3,component4');", "sql": "SELECT product_line, SUM(amount + (SELECT SUM(amount) FROM raw_materials WHERE sub_components LIKE CONCAT('%', product_line, '%'))) FROM raw_materials GROUP BY product_line;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 323).", "schema": null, "sql": "insert into nt2 values (1,1,true,true);", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players were drafted by the Edmonton Oilers?", "schema": "CREATE TABLE table_2840500_2 (player VARCHAR, nhl_team VARCHAR)", "sql": "SELECT COUNT(player) FROM table_2840500_2 WHERE nhl_team = 'Edmonton Oilers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 203).", "schema": null, "sql": "SELECT '(1,1)'::cube <=> '(4,5)'::cube as d_c;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "Delete policies that have been expired for more than 2 years for policyholders in California.", "schema": "CREATE TABLE policies (id INT, policyholder_id INT, policy_type TEXT, issue_date DATE, expiry_date DATE); INSERT INTO policies (id, policyholder_id, policy_type, issue_date, expiry_date) VALUES (1, 3, 'Life', '2020-01-01', '2022-01-01'), (2, 4, 'Health', '2021-02-01', '2023-02-01'), (3, 5, 'Auto', '2021-03-01', '2024-03-01'); CREATE TABLE policyholders (id INT, name TEXT, state TEXT); INSERT INTO policyholders (id, name, state) VALUES (3, 'Sophia Garcia', 'California'), (4, 'Daniel Kim', 'Nevada'), (5, 'Claire Williams', 'Texas');", "sql": "DELETE FROM policies WHERE policies.id IN (SELECT policies.id FROM policies JOIN policyholders ON policies.policyholder_id = policyholders.id WHERE policyholders.state = 'California' AND policies.expiry_date < DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 251, "num_statements": 1} {"question": "List all climate adaptation projects and their respective completion years.", "schema": "CREATE TABLE climate_adaptation_projects (project_id INT, project_name TEXT, completion_year INT, project_type TEXT); INSERT INTO climate_adaptation_projects (project_id, project_name, completion_year, project_type) VALUES (16, 'Coastal Erosion Protection P', 2018, 'adaptation'), (17, 'Water Management Q', 2019, 'adaptation'), (18, 'Disaster Risk Reduction R', 2020, 'adaptation'), (19, 'Climate Resilient Agriculture S', 2021, 'adaptation');", "sql": "SELECT project_name, completion_year FROM climate_adaptation_projects WHERE project_type = 'adaptation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What caused the collapse of the Mausoleum at Halicarnassus?", "schema": "CREATE TABLE table_19342760_1 (cause_of_destruction VARCHAR, name VARCHAR)", "sql": "SELECT cause_of_destruction FROM table_19342760_1 WHERE name = 'Mausoleum at Halicarnassus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every high score for a strike rate of 84.88?", "schema": "CREATE TABLE table_2985664_8 (high_score VARCHAR, strike_rate VARCHAR)", "sql": "SELECT high_score FROM table_2985664_8 WHERE strike_rate = '84.88';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest pole with a Flap larger than 5, and a before race 155?", "schema": "CREATE TABLE table_name_49 (pole INTEGER, flap VARCHAR, race VARCHAR)", "sql": "SELECT MIN(pole) FROM table_name_49 WHERE flap > 5 AND race < 155;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2nd leg with ldu quito as team 1?", "schema": "CREATE TABLE table_name_62 (team__number1 VARCHAR)", "sql": "SELECT 2 AS nd_leg FROM table_name_62 WHERE team__number1 = 'ldu quito';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the minimum temperature (°C) for fish farms located in the Southern Ocean?", "schema": "CREATE TABLE fish_farms (id INT, name TEXT, location TEXT, temperature FLOAT); INSERT INTO fish_farms (id, name, location, temperature) VALUES (1, 'Farm A', 'Southern Ocean', 10.0), (2, 'Farm B', 'Southern Ocean', 8.0), (3, 'Farm C', 'Indian Ocean', 12.0);", "sql": "SELECT MIN(temperature) FROM fish_farms WHERE location = 'Southern Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "List all players who have participated in ESports events for a single platform, along with their demographic information.", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10));CREATE VIEW ESportsEventsView (PlayerID, EventCount, Platform) AS SELECT PlayerID, COUNT(*), GamePlatform FROM ESportsEvents JOIN Games ON ESportsEvents.GameID = Games.GameID GROUP BY PlayerID, GamePlatform;", "sql": "SELECT p.PlayerID, p.Age, p.Gender FROM Players p INNER JOIN ESportsEventsView ev ON p.PlayerID = ev.PlayerID GROUP BY p.PlayerID HAVING COUNT(DISTINCT ev.Platform) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "What is the percentage of dispensaries in each state that sell a particular strain, such as Blue Dream?", "schema": "CREATE TABLE DispensaryStrains (dispensary VARCHAR(255), state VARCHAR(255), strain VARCHAR(255)); INSERT INTO DispensaryStrains (dispensary, state, strain) VALUES ('Dispensary A', 'CA', 'Blue Dream'), ('Dispensary A', 'CO', 'Sour Diesel'), ('Dispensary B', 'CA', 'Blue Dream'), ('Dispensary B', 'CO', 'Durban Poison');", "sql": "SELECT state, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM DispensaryStrains WHERE strain = 'Blue Dream') as percentage FROM DispensaryStrains WHERE strain = 'Blue Dream' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average FAC/LC apps with a CL G greater than 0, less than 33 PL apps, a FAC/LC G of 0, and less than 2 FA YC?", "schema": "CREATE TABLE table_name_46 (fac___lc_apps INTEGER, fa_yc VARCHAR, fac___lc_g VARCHAR, cl_g VARCHAR, pl_apps VARCHAR)", "sql": "SELECT AVG(fac___lc_apps) FROM table_name_46 WHERE cl_g > 0 AND pl_apps < 33 AND fac___lc_g = 0 AND fa_yc < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Count the number of songs by artists from Canada.", "schema": "CREATE TABLE Songs (song_id INT, artist_id INT, title VARCHAR(100), release_year INT); CREATE TABLE Artists (artist_id INT, name VARCHAR(100), country VARCHAR(50));", "sql": "SELECT COUNT(s.song_id) FROM Songs s INNER JOIN Artists a ON s.artist_id = a.artist_id WHERE a.country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Which defense contractors have negotiated contracts for missile defense systems in Europe?", "schema": "CREATE TABLE ContractNegotiations (contractID INT, company VARCHAR(255), systemType VARCHAR(255), region VARCHAR(255)); INSERT INTO ContractNegotiations (contractID, company, systemType, region) VALUES (1, 'Raytheon', 'Missile Defense System', 'Europe'); INSERT INTO ContractNegotiations (contractID, company, systemType, region) VALUES (2, 'Lockheed Martin', 'Missile Defense System', 'Europe');", "sql": "SELECT DISTINCT company FROM ContractNegotiations WHERE systemType = 'Missile Defense System' AND region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average cultural competency score for community health workers by gender?", "schema": "CREATE TABLE community_health_workers (worker_id INT, age INT, gender VARCHAR(255), cultural_competency_score INT); INSERT INTO community_health_workers (worker_id, age, gender, cultural_competency_score) VALUES (1, 35, 'Male', 80), (2, 40, 'Female', 85), (3, 45, 'Non-binary', 90);", "sql": "SELECT gender, AVG(cultural_competency_score) FROM community_health_workers GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many wins for bruce fleisher with over 31 events?", "schema": "CREATE TABLE table_name_16 (wins INTEGER, player VARCHAR, events VARCHAR)", "sql": "SELECT AVG(wins) FROM table_name_16 WHERE player = 'bruce fleisher' AND events > 31;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Which causes received donations from donors in a specific region?", "schema": "CREATE TABLE Donors (DonorID INT, Region VARCHAR(50)); CREATE TABLE Donations (DonationID INT, DonorID INT, Cause VARCHAR(50), Amount DECIMAL(10,2)); INSERT INTO Donors (DonorID, Region) VALUES (1, 'North America'), (2, 'South America'), (3, 'North America'), (4, 'Europe'), (5, 'Asia'); INSERT INTO Donations (DonationID, DonorID, Cause, Amount) VALUES (1, 1, 'Education', 2000), (2, 2, 'Health', 3000), (3, 1, 'Education', 1000), (4, 4, 'Environment', 4000);", "sql": "SELECT Cause FROM Donations D JOIN Donors R ON D.DonorID = R.DonorID WHERE R.Region = 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 705).", "schema": null, "sql": "select '{\"a\":1 , \"b\":2, \"c\":3}'::jsonb - 'c';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{\"a\":1 , \"b\":2, \"c\":3}'::jsonb - 'c') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1} {"question": "What are the mental health conditions with a patient count greater than 50000 in Europe?", "schema": "CREATE TABLE european_health_conditions (id INT, name VARCHAR(255), patients INT); INSERT INTO european_health_conditions (id, name, patients) VALUES (1, 'Stress Disorder', 45000); INSERT INTO european_health_conditions (id, name, patients) VALUES (2, 'PTSD', 70000); INSERT INTO european_health_conditions (id, name, patients) VALUES (3, 'ADHD', 80000);", "sql": "SELECT name FROM european_health_conditions WHERE patients > 50000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Setup (example 13).", "schema": null, "sql": "create function tcl_int4ge(int4,int4) returns bool as '\n if {$1 >= $2} {\n return t\n }\n return f\n' language pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Setup.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "How many water sources were offline for more than two weeks in the 'WaterSources' table?", "schema": "CREATE TABLE WaterSources (ID INT, SourceID INT, Status VARCHAR(10), LastOnline DATE); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (1, 1, 'Online', '2022-01-01'); INSERT INTO WaterSources (ID, SourceID, Status, LastOnline) VALUES (2, 2, 'Offline', '2022-06-15');", "sql": "SELECT COUNT(*) FROM WaterSources WHERE Status = 'Offline' AND DATEDIFF(day, LastOnline, GETDATE()) > 14;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the value for utah when texas is humphrey?", "schema": "CREATE TABLE table_name_80 (utah VARCHAR, texas VARCHAR)", "sql": "SELECT utah FROM table_name_80 WHERE texas = 'humphrey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "List the names and genres of games designed by developers from Latin America in the last 2 years.", "schema": "CREATE TABLE game_designers (id INT, name VARCHAR(50), gender VARCHAR(50), country VARCHAR(50)); INSERT INTO game_designers (id, name, gender, country) VALUES (1, 'María Rodríguez', 'Female', 'Argentina'); INSERT INTO game_designers (id, name, gender, country) VALUES (2, 'Carlos Alvarez', 'Male', 'Brazil'); CREATE TABLE game_sales (id INT, game_name VARCHAR(50), genre VARCHAR(50), sale_date DATE); INSERT INTO game_sales (id, game_name, genre, sale_date) VALUES (1, 'Game C', 'RPG', '2022-02-01'); INSERT INTO game_sales (id, game_name, genre, sale_date) VALUES (2, 'Game D', 'Strategy', '2022-03-15');", "sql": "SELECT game_sales.game_name, game_sales.genre FROM game_sales INNER JOIN game_designers ON game_sales.game_name = game_designers.name WHERE game_designers.country IN ('Argentina', 'Brazil', 'Colombia') AND game_sales.sale_date >= DATEADD(year, -2, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Identify all marine species that have been impacted by ocean acidification and their conservation status.", "schema": "CREATE TABLE marine_species (id INT, species_name TEXT, conservation_status TEXT); CREATE TABLE ocean_acidification_impact (id INT, species_id INT, PRIMARY KEY (id, species_id), FOREIGN KEY (species_id) REFERENCES marine_species(id)); INSERT INTO marine_species (id, species_name, conservation_status) VALUES (1, 'Coral', 'Vulnerable'), (2, 'Salmon', 'Least Concern'), (3, 'Sea Turtle', 'Endangered'); INSERT INTO ocean_acidification_impact (id, species_id) VALUES (1, 1), (2, 3);", "sql": "SELECT marine_species.species_name, marine_species.conservation_status FROM marine_species INNER JOIN ocean_acidification_impact ON marine_species.id = ocean_acidification_impact.species_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What category was Scream nominated for at the International Horror Guild?", "schema": "CREATE TABLE table_name_52 (category VARCHAR, work VARCHAR, award VARCHAR)", "sql": "SELECT category FROM table_name_52 WHERE work = 'scream' AND award = 'international horror guild';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the birth date for taavi sadam", "schema": "CREATE TABLE table_25058562_2 (birth_date VARCHAR, player VARCHAR)", "sql": "SELECT birth_date FROM table_25058562_2 WHERE player = 'Taavi Sadam';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What engine scored 17 points?", "schema": "CREATE TABLE table_name_86 (engine VARCHAR, points VARCHAR)", "sql": "SELECT engine FROM table_name_86 WHERE points = '17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total funding amount for startups founded by individuals who identify as Native American in the agriculture sector?", "schema": "CREATE TABLE startup (id INT, name VARCHAR(100), industry VARCHAR(50), founder_native_american VARCHAR(3), funding FLOAT); INSERT INTO startup VALUES (1, 'StartupA', 'Agriculture', 'Yes', 1000000); INSERT INTO startup VALUES (2, 'StartupB', 'Tech', 'No', 7000000); INSERT INTO startup VALUES (3, 'StartupC', 'Agriculture', 'Yes', 1200000);", "sql": "SELECT SUM(funding) FROM startup WHERE founder_native_american = 'Yes' AND industry = 'Agriculture';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the D 41 which has a D 43 of r 18", "schema": "CREATE TABLE table_name_45 (d_41 VARCHAR, d_43 VARCHAR)", "sql": "SELECT d_41 FROM table_name_45 WHERE d_43 = 'r 18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 24).", "schema": null, "sql": "select polyf(multirange(int4range(42, 49)), 11, 2::smallint) as int, polyf(multirange(float8range(4.5, 7.8)), 7.8, 11::real) as num;", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select polyf(multirange(int4range(42, 49)), 11, 2::smallint) as int, polyf(multirange(float8range(4.5, 7.8)), 7.8, 11::real) as num) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "What is the earliest release date of a game in the 'VirtualRealityGames' table?", "schema": "CREATE TABLE VirtualRealityGames (GameID INT, GameName TEXT, ReleaseDate DATE); INSERT INTO VirtualRealityGames (GameID, GameName, ReleaseDate) VALUES (1, 'Game1', '2021-01-01'), (2, 'Game2', '2022-02-02'), (3, 'Game3', '2020-03-03');", "sql": "SELECT MIN(ReleaseDate) FROM VirtualRealityGames;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 451).", "schema": null, "sql": "select jsonb_path_query('-0.00123456', '$.decimal(2,-4)');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('-0.00123456', '$.decimal(2,-4)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times did drinking games win the poll?", "schema": "CREATE TABLE table_15781170_3 (_number VARCHAR, poll_winner VARCHAR)", "sql": "SELECT COUNT(_number) FROM table_15781170_3 WHERE poll_winner = 'Drinking Games';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 130).", "schema": null, "sql": "insert into rtest_empmass values ('maier', '5000.00');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Lane used by a racer from Malta?", "schema": "CREATE TABLE table_name_67 (lane INTEGER, nationality VARCHAR)", "sql": "SELECT MAX(lane) FROM table_name_67 WHERE nationality = 'malta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Wins, when Losses is less than 10, when Against is less than 1253, and when Byes is less than 0?", "schema": "CREATE TABLE table_name_42 (wins VARCHAR, byes VARCHAR, losses VARCHAR, against VARCHAR)", "sql": "SELECT COUNT(wins) FROM table_name_42 WHERE losses < 10 AND against < 1253 AND byes < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: After the year 2011, when the Canada - USA Rank was 46th, what was the League Rank?", "schema": "CREATE TABLE table_name_54 (league VARCHAR, year VARCHAR, canada___usa_rank VARCHAR)", "sql": "SELECT league AS Rank FROM table_name_54 WHERE year > 2011 AND canada___usa_rank = '46th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total water consumption by each sector in the most recent year?", "schema": "CREATE TABLE sector_year_consumption (year INT, sector INT, consumption FLOAT, PRIMARY KEY(year, sector)); INSERT INTO sector_year_consumption (year, sector, consumption) VALUES (2015, 1, 15000), (2015, 2, 20000), (2015, 3, 30000), (2016, 1, 16000), (2016, 2, 22000), (2016, 3, 32000), (2017, 1, 17000), (2017, 2, 24000), (2017, 3, 34000);", "sql": "SELECT syc.sector, SUM(syc.consumption) as total_consumption FROM sector_year_consumption syc JOIN (SELECT MAX(year) as most_recent_year FROM sector_year_consumption) max_year ON 1=1 WHERE syc.year = max_year.most_recent_year GROUP BY syc.sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 246, "num_statements": 1} {"question": "How many climate finance projects are there in total?", "schema": "CREATE TABLE climate_finance (project_name TEXT, location TEXT, amount INTEGER); INSERT INTO climate_finance (project_name, location, amount) VALUES ('Project A', 'Asia', 500000), ('Project B', 'Europe', 300000);", "sql": "SELECT COUNT(*) FROM climate_finance;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "How many visitors have attended events at multiple museum locations?", "schema": "CREATE TABLE MuseumLocations (LocationID INT, LocationName VARCHAR(255)); INSERT INTO MuseumLocations (LocationID, LocationName) VALUES (1, 'Location 1'); INSERT INTO MuseumLocations (LocationID, LocationName) VALUES (2, 'Location 2'); CREATE TABLE Events (EventID INT, LocationID INT); INSERT INTO Events (EventID, LocationID) VALUES (1, 1); INSERT INTO Events (EventID, LocationID) VALUES (2, 1); INSERT INTO Events (EventID, LocationID) VALUES (3, 2); CREATE TABLE Visitors (VisitorID INT, VisitedMultipleLocations BOOLEAN); INSERT INTO Visitors (VisitorID, VisitedMultipleLocations) VALUES (1, true); INSERT INTO Visitors (VisitorID, VisitedMultipleLocations) VALUES (2, false); INSERT INTO Visitors (VisitorID, VisitedMultipleLocations) VALUES (3, true); CREATE TABLE VisitorEvents (VisitorID INT, EventID INT); INSERT INTO VisitorEvents (VisitorID, EventID) VALUES (1, 1); INSERT INTO VisitorEvents (VisitorID, EventID) VALUES (1, 2); INSERT INTO VisitorEvents (VisitorID, EventID) VALUES (3, 3);", "sql": "SELECT COUNT(V.VisitorID) as TotalVisitors FROM Visitors V INNER JOIN VisitorEventsVE ON V.VisitorID = VE.VisitorID INNER JOIN Events E ON VE.EventID = E.EventID INNER JOIN MuseumLocations ML ON E.LocationID = ML.LocationID GROUP BY V.VisitorID HAVING COUNT(DISTINCT ML.LocationID) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1} {"question": "What is the minimum capacity of renewable energy projects in a given state?", "schema": "CREATE TABLE State (state_id INT, state_name VARCHAR(50)); CREATE TABLE Project (project_id INT, project_name VARCHAR(50), project_capacity INT, state_id INT);", "sql": "SELECT State.state_name, MIN(Project.project_capacity) as min_capacity FROM State JOIN Project ON State.state_id = Project.state_id GROUP BY State.state_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the id and the amount of refund of the booking that incurred the most times of payments?", "schema": "CREATE TABLE Payments (booking_id VARCHAR); CREATE TABLE Bookings (booking_id VARCHAR, amount_of_refund VARCHAR)", "sql": "SELECT T1.booking_id, T1.amount_of_refund FROM Bookings AS T1 JOIN Payments AS T2 ON T1.booking_id = T2.booking_id GROUP BY T1.booking_id ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest grid that has gregorio lavilla as the rider?", "schema": "CREATE TABLE table_name_89 (grid INTEGER, rider VARCHAR)", "sql": "SELECT MAX(grid) FROM table_name_89 WHERE rider = 'gregorio lavilla';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Identify users who have posted more than once per day on average.", "schema": "CREATE TABLE users (user_id INT, user_name VARCHAR(50)); INSERT INTO users (user_id, user_name) VALUES (1001, 'user1'), (1002, 'user2'), (1003, 'user3'), (1004, 'user4');", "sql": "SELECT user_id, user_name, AVG(post_count) as avg_posts_per_day FROM (SELECT user_id, user_name, post_date, COUNT(*) as post_count FROM posts JOIN users ON posts.user_id = users.user_id GROUP BY post_date, user_id) as subquery GROUP BY user_id, user_name HAVING AVG(post_count) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 282, "num_statements": 1} {"question": "What is the minimum number of community education programs conducted per year for each type of program?", "schema": "CREATE TABLE Education_Programs (id INT, year INT, program_type VARCHAR(50), number_of_programs INT);", "sql": "SELECT program_type, MIN(number_of_programs) FROM Education_Programs GROUP BY program_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Rank has a Lost smaller than 12, and a Played larger than 20, and an Avg Points larger than 1.73?", "schema": "CREATE TABLE table_name_95 (rank INTEGER, lost VARCHAR, played VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_95 WHERE lost < 12 AND played > 20 AND Avg.points > 1.73;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 511).", "schema": null, "sql": "SELECT encode(overlay(E'Th\\\\000omas'::bytea placing E'\\\\002\\\\003'::bytea from 8),'escape');", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT encode(overlay(E'Th\\\\000omas'::bytea placing E'\\\\002\\\\003'::bytea from 8),'escape')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many series had a production code of 202?", "schema": "CREATE TABLE table_23799417_1 (no_in_series VARCHAR, production_code VARCHAR)", "sql": "SELECT COUNT(no_in_series) FROM table_23799417_1 WHERE production_code = 202;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average number of wins per season for a specific football team?", "schema": "CREATE TABLE team_wins (id INT, team VARCHAR(50), sport VARCHAR(20), season VARCHAR(10), wins INT);", "sql": "SELECT AVG(wins) FROM team_wins WHERE team = 'Manchester United' AND sport = 'Football' GROUP BY season;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Delete the record with id 2 in the providers table.", "schema": "CREATE TABLE providers (id INT, name VARCHAR(50), specialty VARCHAR(50), PRIMARY KEY(id)); INSERT INTO providers (id, name, specialty) VALUES (1, 'Dr. Ava Jones', 'Psychiatry'), (2, 'Dr. Mohamed Ahmed', 'Psychology');", "sql": "DELETE FROM providers WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 602).", "schema": null, "sql": "UPDATE x1 SET b = b || '_updt' WHERE f_leak(b) RETURNING *;", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Club has Fiba European Champion's Cup and an Italian Cup?", "schema": "CREATE TABLE table_name_27 (club VARCHAR, european_cup VARCHAR, national_cup VARCHAR)", "sql": "SELECT club FROM table_name_27 WHERE european_cup = 'fiba european champion's cup' AND national_cup = 'italian cup';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average age of patients diagnosed with Hepatitis in Canada?", "schema": "CREATE TABLE PatientData (PatientID INT, Age INT, Gender VARCHAR(10), Diagnosis VARCHAR(20), Country VARCHAR(20)); INSERT INTO PatientData (PatientID, Age, Gender, Diagnosis, Country) VALUES (1, 34, 'Male', 'Hepatitis', 'Canada'); INSERT INTO PatientData (PatientID, Age, Gender, Diagnosis, Country) VALUES (2, 42, 'Female', 'Flu', 'USA');", "sql": "SELECT AVG(Age) FROM PatientData WHERE Diagnosis = 'Hepatitis' AND Country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the number of properties in each property type category?", "schema": "CREATE TABLE Properties (id INT, price INT, property_type TEXT); INSERT INTO Properties (id, price, property_type) VALUES (1, 500000, 'House'), (2, 400000, 'Condo'), (3, 700000, 'Townhouse');", "sql": "SELECT property_type, COUNT(*) AS property_count FROM Properties GROUP BY property_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Which countries have the highest and lowest average product prices for circular supply chain products?", "schema": "CREATE TABLE products (product_id INT, price DECIMAL, circular_supply BOOLEAN, country VARCHAR(50)); INSERT INTO products (product_id, price, circular_supply, country) VALUES (1, 15.99, true, 'USA'), (2, 25.49, false, 'USA'), (3, 12.99, true, 'Canada'), (4, 18.99, true, 'Mexico'), (5, 9.99, false, 'Mexico');", "sql": "SELECT country, AVG(price) FROM products WHERE circular_supply = true GROUP BY country ORDER BY AVG(price) ASC, country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the average daily return for all stocks in the 'America' region, ordered by return in descending order?", "schema": "CREATE TABLE stocks (id INT, symbol VARCHAR(10), region VARCHAR(20), return DECIMAL(5,4)); INSERT INTO stocks (id, symbol, region, return) VALUES (1, 'AAPL', 'Asia', 0.0234); INSERT INTO stocks (id, symbol, region, return) VALUES (2, 'GOOG', 'America', 0.0187); INSERT INTO stocks (id, symbol, region, return) VALUES (3, 'BABA', 'Asia', 0.0156); INSERT INTO stocks (id, symbol, region, return) VALUES (4, 'TSLA', 'America', 0.0125);", "sql": "SELECT region, AVG(return) as avg_return FROM stocks WHERE region = 'America' GROUP BY region ORDER BY avg_return DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score for place 4?", "schema": "CREATE TABLE table_name_21 (score VARCHAR, place VARCHAR)", "sql": "SELECT score FROM table_name_21 WHERE place = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the capacity for the institution of university of alberta?", "schema": "CREATE TABLE table_27599216_6 (capacity VARCHAR, institution VARCHAR)", "sql": "SELECT capacity FROM table_27599216_6 WHERE institution = 'University of Alberta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total energy production (in MWh) for each country from the renewable source 'wind'?", "schema": "CREATE TABLE energy_production (country TEXT, source TEXT, energy_generated FLOAT); INSERT INTO energy_production (country, source, energy_generated) VALUES ('USA', 'wind', 12000), ('Canada', 'wind', 8000), ('Mexico', 'wind', 6000);", "sql": "SELECT country, SUM(energy_generated) FROM energy_production WHERE source = 'wind' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: with a record of 73-82 what was the date?", "schema": "CREATE TABLE table_name_58 (date VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_58 WHERE record = '73-82';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 417).", "schema": null, "sql": "SELECT to_number('12,454.8-'::citext, '99G999D9S'::citext)\n = to_number('12,454.8-', '99G999D9S') AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the total number of network infrastructure investments for a specific country in the last year?", "schema": "CREATE TABLE network_investments (investment_id INT, investment_date DATE, country VARCHAR(50), investment_amount INT);", "sql": "SELECT country, SUM(investment_amount) FROM network_investments WHERE country = 'CountryName' AND investment_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Update the genre of the artist with id 2 to 'R&B'.", "schema": "CREATE TABLE artists (id INT, name TEXT, genre TEXT); INSERT INTO artists (id, name, genre) VALUES (1, 'Beyoncé', 'Pop'), (2, 'Rihanna', 'Hip Hop');", "sql": "UPDATE artists SET genre = 'R&B' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest decile of Tarras school, which had a state authority?", "schema": "CREATE TABLE table_name_58 (decile INTEGER, authority VARCHAR, name VARCHAR)", "sql": "SELECT MAX(decile) FROM table_name_58 WHERE authority = 'state' AND name = 'tarras school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the lowest dissolved oxygen level in the Pacific Ocean for tuna farms?", "schema": "CREATE TABLE Pacific_Ocean (id INT, dissolved_oxygen DECIMAL(5,2)); INSERT INTO Pacific_Ocean (id, dissolved_oxygen) VALUES (1, 6.5), (2, 7.2), (3, 5.9); CREATE TABLE Tuna_Farms (id INT, ocean VARCHAR(20)); INSERT INTO Tuna_Farms (id, ocean) VALUES (1, 'Pacific'), (2, 'Indian'), (3, 'Pacific');", "sql": "SELECT MIN(Pacific_Ocean.dissolved_oxygen) FROM Pacific_Ocean INNER JOIN Tuna_Farms ON Pacific_Ocean.id = Tuna_Farms.id WHERE Tuna_Farms.ocean = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Orangemen record during game 3?", "schema": "CREATE TABLE table_23346983_1 (record VARCHAR, game VARCHAR)", "sql": "SELECT record FROM table_23346983_1 WHERE game = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of goals for players ranked above 9 and playing more than 205 matches?", "schema": "CREATE TABLE table_name_91 (goals INTEGER, matches VARCHAR, rank VARCHAR)", "sql": "SELECT AVG(goals) FROM table_name_91 WHERE matches = 205 AND rank > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total balance of Shariah-compliant savings accounts for customers in California, grouped by city?", "schema": "CREATE TABLE savings_accounts (id INT, customer_id INT, account_type VARCHAR(20), balance DECIMAL(10, 2), state VARCHAR(2)); INSERT INTO savings_accounts (id, customer_id, account_type, balance, state) VALUES (1, 101, 'Shariah', 5000, 'California'); CREATE TABLE customers (id INT, first_name VARCHAR(20), last_name VARCHAR(20), city VARCHAR(20)); INSERT INTO customers (id, first_name, last_name, city) VALUES (101, 'Ahmad', 'Ali', 'San Francisco');", "sql": "SELECT savings_accounts.state, customers.city, SUM(savings_accounts.balance) FROM savings_accounts INNER JOIN customers ON savings_accounts.customer_id = customers.id WHERE savings_accounts.account_type = 'Shariah' AND savings_accounts.state = 'California' GROUP BY customers.city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 281, "num_statements": 1} {"question": "List all transactions made by 'Mateo Davis' and their details.", "schema": "CREATE TABLE transactions (id INT PRIMARY KEY, account_id INT, type VARCHAR(255), amount DECIMAL(10,2), date DATE, client_id INT); INSERT INTO transactions (id, account_id, type, amount, date, client_id) VALUES (1, 1, 'Deposit', 2000.00, '2021-01-01', 1001), (2, 2, 'Withdrawal', 1500.00, '2021-02-10', 1002), (3, 3, 'Transfer', 500.00, '2021-03-20', 1003), (4, 1003, 'Withdrawal', 1000.00, '2021-04-01', 1005), (5, 1002, 'Withdrawal', 500.00, '2021-05-15', 1006), (6, 5, 'Deposit', 1000.00, '2021-06-01', 1004), (7, 4, 'Payment', 500.00, '2021-06-15', 1004);", "sql": "SELECT * FROM transactions WHERE client_id = (SELECT id FROM clients WHERE name = 'Mateo Davis');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the number of financially capable individuals in each country?", "schema": "CREATE TABLE if not exists individuals (id INT, country VARCHAR(50), is_financially_capable BOOLEAN, age INT, gender VARCHAR(10));", "sql": "SELECT country, COUNT(*) FROM individuals WHERE is_financially_capable = TRUE GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the frequency for katherine?", "schema": "CREATE TABLE table_name_99 (frequency VARCHAR, area_served VARCHAR)", "sql": "SELECT frequency FROM table_name_99 WHERE area_served = 'katherine';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the number and averaged salary of all instructors who are in the department with the highest budget.", "schema": "CREATE TABLE department (dept_name VARCHAR, budget VARCHAR); CREATE TABLE instructor (salary INTEGER, dept_name VARCHAR)", "sql": "SELECT AVG(T1.salary), COUNT(*) FROM instructor AS T1 JOIN department AS T2 ON T1.dept_name = T2.dept_name ORDER BY T2.budget DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Class has a Year(s) of Manufacture of 1899?", "schema": "CREATE TABLE table_name_32 (class VARCHAR, year_s__of_manufacture VARCHAR)", "sql": "SELECT class FROM table_name_32 WHERE year_s__of_manufacture = '1899';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "CREATE FUNCTION pg_stat_statements_reset(IN userid Oid DEFAULT 0,\n\tIN dbid Oid DEFAULT 0,\n\tIN queryid bigint DEFAULT 0,\n\tIN minmax_only boolean DEFAULT false\n)\nRETURNS timestamp with time zone\nAS 'MODULE_PATHNAME', 'pg_stat_statements_reset_1_11'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 279, "num_statements": 1} {"question": "How many regulatory frameworks have been implemented in Africa per year?", "schema": "CREATE TABLE regulatory_frameworks (framework_id INT, framework_name VARCHAR(50), framework_jurisdiction VARCHAR(50), enforcement_agency VARCHAR(50), implementation_year INT);", "sql": "CREATE VIEW africa_frameworks_per_year AS SELECT TO_CHAR(implementation_year, 'YYYY') AS implementation_year, COUNT(framework_id) AS frameworks_implemented FROM regulatory_frameworks WHERE framework_jurisdiction = 'Africa' GROUP BY implementation_year ORDER BY implementation_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 281, "num_statements": 1} {"question": "What is the total amount donated by the donor with the id 2?", "schema": "CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2)); INSERT INTO donations (id, donor_id, amount) VALUES (1, 2, 50.00), (2, 2, 30.00), (3, 2, 100.00);", "sql": "SELECT SUM(amount) FROM donations WHERE donor_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the distribution of carbon pricing schemes by region in the carbon_pricing table?", "schema": "CREATE TABLE carbon_pricing (id INT, name VARCHAR(50), type VARCHAR(50), region VARCHAR(50), start_date DATE, end_date DATE);", "sql": "SELECT region, COUNT(*) as num_pricing_schemes FROM carbon_pricing GROUP BY region ORDER BY num_pricing_schemes DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the % similarity to C7orf38 of the animal whose % identity to C7orf38 is 81?", "schema": "CREATE TABLE table_26957063_3 (_percentage_similarity_to_c7orf38 INTEGER, _percentage_identity_to_c7orf38 VARCHAR)", "sql": "SELECT MAX(_percentage_similarity_to_c7orf38) FROM table_26957063_3 WHERE _percentage_identity_to_c7orf38 = 81;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the height of Josh Tabb?", "schema": "CREATE TABLE table_name_28 (height VARCHAR, name VARCHAR)", "sql": "SELECT height FROM table_name_28 WHERE name = 'josh tabb';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won gold in the games where Pan Cheng-Tsung won silver?", "schema": "CREATE TABLE table_name_76 (gold VARCHAR, silver VARCHAR)", "sql": "SELECT gold FROM table_name_76 WHERE silver = 'pan cheng-tsung';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many laps did ben spies do on the grid less than 7?", "schema": "CREATE TABLE table_name_86 (laps VARCHAR, grid VARCHAR, rider VARCHAR)", "sql": "SELECT COUNT(laps) FROM table_name_86 WHERE grid < 7 AND rider = 'ben spies';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 81).", "schema": null, "sql": "SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most goals for total club and apps less than 120", "schema": "CREATE TABLE table_name_34 (goals INTEGER, club VARCHAR, apps VARCHAR)", "sql": "SELECT MAX(goals) FROM table_name_34 WHERE club = 'total' AND apps < 120;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different series numbers are there for the episode seen by 7.84 million people in the US?", "schema": "CREATE TABLE table_23242933_2 (no_in_series VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT COUNT(no_in_series) FROM table_23242933_2 WHERE us_viewers__millions_ = '7.84';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Insert a new record for a tennis match in France with 1500 tickets sold.", "schema": "CREATE TABLE matches (match_id INT, sport VARCHAR(50), location VARCHAR(50), tickets_sold INT);", "sql": "INSERT INTO matches (match_id, sport, location, tickets_sold) VALUES (3, 'Tennis', 'France', 1500);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Identify the daily sales trend for the past year, including the current day, by calculating the moving average of units sold per day.", "schema": "CREATE TABLE daily_sales (sale_date DATE, units_sold INT); INSERT INTO daily_sales (sale_date, units_sold) VALUES ('2021-04-01', 500), ('2021-04-02', 600), ('2021-04-03', 700), ('2021-04-04', 800), ('2021-04-05', 900), ('2022-04-01', 1000);", "sql": "SELECT sale_date, AVG(units_sold) OVER (ORDER BY sale_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS moving_average FROM daily_sales WHERE sale_date >= DATE_TRUNC('day', CURRENT_DATE - INTERVAL '365 day') ORDER BY sale_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 228, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'plpgsql' (example 32).", "schema": null, "sql": "create trigger tg_wslot_biu before insert or update\n on WSlot for each row execute procedure tg_wslot_biu();", "explanation": "DDL from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the maximum 1st prize( $ ) where score is 193 (-17)", "schema": "CREATE TABLE table_11622255_1 (score VARCHAR)", "sql": "SELECT MAX(1 AS st_prize__) AS $__ FROM table_11622255_1 WHERE score = '193 (-17)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the total number of cruelty-free products with natural ingredients?", "schema": "CREATE TABLE Products (product_id INT, is_cruelty_free BOOLEAN, has_natural_ingredients BOOLEAN);", "sql": "SELECT COUNT(*) FROM Products WHERE is_cruelty_free = TRUE AND has_natural_ingredients = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Episodes (TV+extra) 2, when Broadcasts (TV) 1 is less than 13, and when Directors is \"Shigehito Takayanagi\"?", "schema": "CREATE TABLE table_name_26 (episodes__tv VARCHAR, extra__2 VARCHAR, broadcasts__tv__1 VARCHAR, directors VARCHAR)", "sql": "SELECT episodes__tv + extra__2 FROM table_name_26 WHERE broadcasts__tv__1 < 13 AND directors = 'shigehito takayanagi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the player in Pos mf, move to until 30 june 2009 ?", "schema": "CREATE TABLE table_name_18 (moving_to VARCHAR, date_to VARCHAR, pos VARCHAR)", "sql": "SELECT moving_to FROM table_name_18 WHERE date_to = '30 june 2009' AND pos = 'mf';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 285).", "schema": null, "sql": "select numrange(1,2) << nummultirange();", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select numrange(1,2) << nummultirange()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which IATA has a ICAO of slk?", "schema": "CREATE TABLE table_name_14 (iata VARCHAR, icao VARCHAR)", "sql": "SELECT iata FROM table_name_14 WHERE icao = 'slk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "How many military bases are there in total for each country, ordered by the number of bases in descending order?", "schema": "CREATE TABLE BasesByCountry (Country varchar(50), BaseID int); INSERT INTO BasesByCountry (Country, BaseID) VALUES ('USA', 1), ('USA', 2), ('UK', 3), ('Iraq', 4), ('Iraq', 5);", "sql": "SELECT Country, COUNT(*) as TotalBases FROM BasesByCountry GROUP BY Country ORDER BY TotalBases DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the total number of artworks donated by each artist, ordered by the total count in descending order?", "schema": "CREATE TABLE Artists (ArtistID int, ArtistName varchar(50), NumberOfArtworks int);INSERT INTO Artists (ArtistID, ArtistName, NumberOfArtworks) VALUES (1, 'Pablo Picasso', 500), (2, 'Vincent Van Gogh', 450), (3, 'Claude Monet', 350);", "sql": "SELECT ArtistName, SUM(NumberOfArtworks) OVER (PARTITION BY ArtistID ORDER BY ArtistID) as TotalArtworksDonated FROM Artists ORDER BY TotalArtworksDonated DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "What is the average water consumption per person in the city of San Francisco, calculated monthly?", "schema": "CREATE TABLE people (person_id INT, city VARCHAR(20), water_consumption FLOAT, consumption_date DATE); INSERT INTO people (person_id, city, water_consumption, consumption_date) VALUES (1, 'San Francisco', 50.0, '2022-01-01'), (2, 'San Francisco', 60.0, '2022-02-01'), (3, 'San Francisco', 55.0, '2022-03-01');", "sql": "SELECT city, AVG(water_consumption) FROM (SELECT city, person_id, AVG(water_consumption) AS water_consumption FROM people GROUP BY city, PERIOD_DIFF(consumption_date, DATE_FORMAT(consumption_date, '%Y%m')) * 100) AS monthly_consumption GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Rank dishes by popularity, considering only orders from the last month, for each category in Chinese restaurants?", "schema": "CREATE TABLE Restaurants (id INT, name TEXT, cuisine TEXT); INSERT INTO Restaurants (id, name, cuisine) VALUES (1, 'Golden Palace', 'Chinese'), (2, 'La Trattoria', 'Italian'); CREATE TABLE Menu (id INT, name TEXT, category TEXT, price FLOAT, restaurant_id INT); CREATE TABLE Orders (id INT, menu_id INT, order_date DATE, quantity INT);", "sql": "SELECT menu.name, menu.category, SUM(orders.quantity) as total_quantity, ROW_NUMBER() OVER (PARTITION BY menu.category ORDER BY SUM(orders.quantity) DESC) as rank FROM Orders JOIN Menu ON Orders.menu_id = Menu.id JOIN Restaurants ON Menu.restaurant_id = Restaurants.id WHERE Restaurants.cuisine = 'Chinese' AND Orders.order_date >= CURRENT_DATE - INTERVAL '1 month' GROUP BY menu.name, menu.category ORDER BY menu.category, rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 429, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average score of submissions?", "schema": "CREATE TABLE submission (Scores INTEGER)", "sql": "SELECT AVG(Scores) FROM submission;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Tally of an Opposition of westmeath?", "schema": "CREATE TABLE table_name_76 (tally VARCHAR, opposition VARCHAR)", "sql": "SELECT tally FROM table_name_76 WHERE opposition = 'westmeath';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many joined Manchester University?", "schema": "CREATE TABLE table_255205_1 (joined VARCHAR, institution VARCHAR)", "sql": "SELECT joined FROM table_255205_1 WHERE institution = 'Manchester University';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of bronze for the United States with more than 1 silver?", "schema": "CREATE TABLE table_name_13 (bronze INTEGER, nation VARCHAR, silver VARCHAR)", "sql": "SELECT MAX(bronze) FROM table_name_13 WHERE nation = 'united states' AND silver > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Retrieve all sites from the Cultural_Heritage table that were established after 1600.", "schema": "CREATE TABLE Cultural_Heritage (Site VARCHAR(50), Year_Established INT, Historical_Significance TEXT); INSERT INTO Cultural_Heritage (Site, Year_Established, Historical_Significance) VALUES ('Machu Picchu', 1450, 'An Incan citadel set high in the Andes Mountains, above the Sacred Valley. It is renowned for its sophisticated dry-stone walls that fuse huge blocks without the use of mortar.'), ('Angkor Wat', 1113, 'A temple complex in Cambodia and one of the largest religious monuments in the world.'), ('Petra', 312, 'An archaeological city in southern Jordan, known for its rock-cut architecture and water conduit system.'), ('Taj Mahal', 1632, 'A white marble mausoleum located in Agra, India, built by Mughal Emperor Shah Jahan in memory of his wife Mumtaz Mahal.');", "sql": "SELECT Site FROM Cultural_Heritage WHERE Year_Established > 1600;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total amount of funds raised by each sector in 'disaster_response' schema?", "schema": "CREATE TABLE funds (fund_id INT, sector VARCHAR(255), amount DECIMAL(10, 2), donation_date DATE); INSERT INTO funds (fund_id, sector, amount, donation_date) VALUES (1, 'Education', 5000.00, '2021-01-01');", "sql": "SELECT sector, SUM(amount) as total_funds_raised FROM funds GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total number of patients who have been diagnosed with HIV and hepatitis C in the state of New York?", "schema": "CREATE TABLE patients (patient_id INT, age INT, gender VARCHAR(10), state VARCHAR(20), disease VARCHAR(20)); INSERT INTO patients (patient_id, age, gender, state, disease) VALUES (1, 45, 'Male', 'New York', 'HIV'); INSERT INTO patients (patient_id, age, gender, state, disease) VALUES (2, 34, 'Female', 'California', 'Hepatitis C');", "sql": "SELECT COUNT(*) FROM patients WHERE state = 'New York' AND (disease = 'HIV' OR disease = 'Hepatitis C');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Display the names of all workers who have not completed any workforce development training in the circular economy.", "schema": "CREATE TABLE workers (id INT, name VARCHAR(50), training VARCHAR(30)); CREATE TABLE circular_economy (id INT, training VARCHAR(30)); INSERT INTO workers (id, name, training) VALUES (1, 'John Smith', 'Recycling'), (2, 'Jane Doe', 'Upcycling'), (3, 'Bob Johnson', 'Waste Reduction'), (4, 'Alice Williams', 'Composting'), (5, 'Jim Brown', NULL), (6, 'Karen Green', NULL); INSERT INTO circular_economy (id, training) VALUES (1, 'Recycling'), (2, 'Upcycling');", "sql": "SELECT w.name FROM workers w LEFT JOIN circular_economy c ON w.training = c.training WHERE c.training IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the maximum fare for a trip on the London underground?", "schema": "CREATE TABLE underground (id INT, line VARCHAR(20), fare DECIMAL(5,2)); INSERT INTO underground (id, line, fare) VALUES (1, 'Circle', 4.90), (2, 'District', 5.10), (3, 'Hammersmith', 6.00);", "sql": "SELECT MAX(fare) FROM underground;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Get the total water usage for mining in 'Baotou'", "schema": "CREATE TABLE mining_impact (id INT PRIMARY KEY, location VARCHAR(255), water_usage INT, air_pollution INT, land_degradation INT);", "sql": "SELECT SUM(water_usage) FROM mining_impact WHERE location = 'Baotou';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Miss Pilipinas that has the Second runner-up of maria penson?", "schema": "CREATE TABLE table_name_55 (miss_maja_pilipinas VARCHAR, second_runner_up VARCHAR)", "sql": "SELECT miss_maja_pilipinas FROM table_name_55 WHERE second_runner_up = 'maria penson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time when ss12 is stage?", "schema": "CREATE TABLE table_21578303_2 (time VARCHAR, stage VARCHAR)", "sql": "SELECT time FROM table_21578303_2 WHERE stage = 'SS12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the least silver for germany when gold is more than 4?", "schema": "CREATE TABLE table_name_94 (silver INTEGER, nation VARCHAR, gold VARCHAR)", "sql": "SELECT MIN(silver) FROM table_name_94 WHERE nation = 'germany' AND gold > 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which League Cup has a Scottish Cup larger than 69?", "schema": "CREATE TABLE table_name_42 (league INTEGER, scottish_cup INTEGER)", "sql": "SELECT SUM(league) AS Cup FROM table_name_42 WHERE scottish_cup > 69;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many marine species have a population of less than 1000 in the Pacific Ocean?", "schema": "CREATE TABLE marine_life (id INT PRIMARY KEY, species VARCHAR(255), population INT, habitat VARCHAR(255));", "sql": "SELECT COUNT(*) FROM marine_life WHERE population < 1000 AND habitat LIKE '%Pacific%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: in which year was available the rubber as kickdrum pad", "schema": "CREATE TABLE table_2889300_6 (years_available VARCHAR, kickdrum_pad VARCHAR)", "sql": "SELECT years_available FROM table_2889300_6 WHERE kickdrum_pad = 'Rubber';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Insert a new record into the 'crypto_exchanges' table with 'exchange_name' 'Kraken', 'exchange_location' 'USA', and 'year_founded' 2011", "schema": "CREATE TABLE crypto_exchanges (exchange_name VARCHAR(50), exchange_location VARCHAR(50), year_founded INT, regulatory_status VARCHAR(20));", "sql": "INSERT INTO crypto_exchanges (exchange_name, exchange_location, year_founded, regulatory_status) VALUES ('Kraken', 'USA', 2011, 'Registered');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 167).", "schema": null, "sql": "SELECT t1.c1, t2.c1 FROM ft5 t1 RIGHT JOIN ft4 t2 ON (t1.c1 = t2.c1) ORDER BY t2.c1, t1.c1 OFFSET 10 LIMIT 10;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What percentage of products are certified cruelty-free in the UK?", "schema": "CREATE TABLE cosmetics.product_safety (product_id INT, country VARCHAR(50), is_cruelty_free BOOLEAN); INSERT INTO cosmetics.product_safety (product_id, country, is_cruelty_free) VALUES (1, 'UK', true), (2, 'France', false), (3, 'Germany', true), (4, 'Italy', true), (5, 'Spain', false);", "sql": "SELECT (SUM(is_cruelty_free) * 100.0 / COUNT(*)) as cruelty_free_percentage FROM cosmetics.product_safety WHERE country = 'UK';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the average temperature of the Mediterranean Sea?", "schema": "CREATE TABLE Seas (id INT, name VARCHAR(50), temperature DECIMAL(3,1)); INSERT INTO Seas (id, name, temperature) VALUES (1, 'Mediterranean', 21.5), (2, 'Caribbean', 27.5), (3, 'Baltic', 16.8);", "sql": "SELECT AVG(Seas.temperature) FROM Seas WHERE Seas.name = 'Mediterranean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.1.0--1.2.0 (assertion 99).", "schema": null, "sql": "-- isnt_procedure( schema, function )\nCREATE OR REPLACE FUNCTION isnt_procedure( NAME, NAME )\nRETURNS TEXT AS $$\n SELECT _func_compare(\n $1, $2, NOT _type_func('p', $1, $2),\n 'Function ' || quote_ident($1) || '.' || quote_ident($2) || '() should not be a procedure'\n );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.1.0--1.2.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 307, "num_statements": 2} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 12).", "schema": null, "sql": "select pgp_sym_decrypt(dearmor('\n-----BEGIN PGP MESSAGE-----\nComment: dat1.aes192.sha1.mdc.s2k0.z0\n\njAQECAAC0kQBBDnQWkgsx9YFaqDfWmpsiyAJ6y2xG/sBvap1dySYEMuZ+wJTXQ9E\nCr3i2M7TgVZ0M4jp4QL0adG1lpN5iK7aQeOwMw==\n=cg+i\n-----END PGP MESSAGE-----\n'), 'foobar');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "What is the total number of community events in Chicago this year?", "schema": "CREATE TABLE CommunityEvents (id INT, city VARCHAR(50), event_date DATE, event_type VARCHAR(50));", "sql": "SELECT COUNT(*) FROM CommunityEvents WHERE city = 'Chicago' AND event_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND event_type = 'community';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "What is the maximum number of visitors at a cultural event in Sydney?", "schema": "CREATE TABLE Cultural_Events (id INT, city VARCHAR(50), attendance INT); CREATE VIEW Sydney_Events AS SELECT * FROM Cultural_Events WHERE city = 'Sydney';", "sql": "SELECT MAX(attendance) FROM Sydney_Events;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the number of international visitors to Japan in 2021 and their average expenditures?", "schema": "CREATE TABLE Visitors_Japan (id INT, year INT, country VARCHAR(50), expenditure FLOAT); INSERT INTO Visitors_Japan (id, year, country, expenditure) VALUES (1, 2021, 'Japan', 2000), (2, 2021, 'Japan', 2100), (3, 2021, 'Japan', 2200);", "sql": "SELECT AVG(Visitors_Japan.expenditure) FROM Visitors_Japan WHERE Visitors_Japan.country = 'Japan' AND Visitors_Japan.year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the total number of permits issued by month and type?", "schema": "CREATE TABLE Permits (ID INT, PermitType VARCHAR(50), IssueMonth VARCHAR(50)); INSERT INTO Permits VALUES (1, 'Building', 'January'), (2, 'Plumbing', 'February'), (3, 'Building', 'March');", "sql": "SELECT IssueMonth, PermitType, COUNT(*) OVER (PARTITION BY IssueMonth, PermitType) AS PermitCount FROM Permits;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show name of all students who have some friends and also are liked by someone else.", "schema": "CREATE TABLE Highschooler (name VARCHAR, id VARCHAR); CREATE TABLE Likes (student_id VARCHAR, liked_id VARCHAR); CREATE TABLE Friend (student_id VARCHAR, liked_id VARCHAR)", "sql": "SELECT T2.name FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id INTERSECT SELECT T2.name FROM Likes AS T1 JOIN Highschooler AS T2 ON T1.liked_id = T2.id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How may times did a player that attended Iowa state appear on the all time roster?", "schema": "CREATE TABLE table_11734041_1 (years_for_rockets VARCHAR, school_club_team_country VARCHAR)", "sql": "SELECT COUNT(years_for_rockets) FROM table_11734041_1 WHERE school_club_team_country = 'Iowa State';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Find the average ESG score for each sector, only showing sectors with more than 2 investments.", "schema": "CREATE TABLE investments(id INT, sector VARCHAR(20), esg_score INT); INSERT INTO investments VALUES(1, 'Tech', 85), (2, 'Healthcare', 75), (3, 'Tech', 82);", "sql": "SELECT sector, AVG(esg_score) as avg_esg_score FROM investments GROUP BY sector HAVING COUNT(*) > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 257).", "schema": null, "sql": "SELECT * FROM check_test(\n domain_type_is( 'public'::name, 'us_postal_code', 'text'),\n true,\n 'domain_type_is(schema, domain, type)',\n 'Domain public.us_postal_code should extend type text',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the loss with record of 14-19", "schema": "CREATE TABLE table_name_68 (loss VARCHAR, record VARCHAR)", "sql": "SELECT loss FROM table_name_68 WHERE record = '14-19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the average ticket price for each concert in the 'music_festivals' table?", "schema": "CREATE TABLE music_festivals (festival_name VARCHAR(255), location VARCHAR(255), date DATE, tier_1_price INT, tier_2_price INT);", "sql": "SELECT festival_name, (tier_1_price + tier_2_price)/2 as avg_ticket_price FROM music_festivals;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the number of hospitals and clinics in urban areas of California, by city?", "schema": "CREATE TABLE hospitals (id INT, name VARCHAR(255), location VARCHAR(255), num_beds INT, is_urban BOOLEAN);CREATE TABLE clinics (id INT, name VARCHAR(255), location VARCHAR(255), is_urban BOOLEAN);", "sql": "SELECT c.city, COUNT(h.name) AS num_hospitals, COUNT(cl.name) AS num_clinics FROM (SELECT location AS city FROM hospitals WHERE is_urban = TRUE UNION SELECT location AS city FROM clinics WHERE is_urban = TRUE) AS c LEFT JOIN hospitals h ON c.city = h.location AND h.is_urban = TRUE LEFT JOIN clinics cl ON c.city = cl.location AND cl.is_urban = TRUE GROUP BY c.city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 366, "num_statements": 1} {"question": "How many workers in each role have been employed in mines for more than 5 years?", "schema": "CREATE TABLE worker (id INT, name TEXT, role TEXT, mine_id INT, employment_year INT);", "sql": "SELECT worker.role, COUNT(worker.id) as workers_count FROM worker WHERE worker.employment_year < (CURRENT_DATE - INTERVAL 5 YEAR) GROUP BY worker.role;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Identify customers who have had an increasing balance for the past three consecutive transactions, for all accounts.", "schema": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), balance DECIMAL(10, 2), transaction_date DATE);", "sql": "SELECT customer_id, account_type, balance FROM (SELECT customer_id, account_type, balance, transaction_date, LAG(balance, 2) OVER (PARTITION BY customer_id ORDER BY transaction_date) AS lag_balance_2 FROM accounts) AS lagged_accounts WHERE balance > lag_balance_2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 264, "num_statements": 1} {"question": "What are the names of UNESCO heritage sites in Europe and their types?", "schema": "CREATE TABLE UNESCO_SITES (id INT PRIMARY KEY, name VARCHAR(255), region VARCHAR(255), type VARCHAR(255)); INSERT INTO UNESCO_SITES (id, name, region, type) VALUES (1, 'Colosseum', 'Europe', 'Cultural');", "sql": "SELECT name, type FROM UNESCO_SITES WHERE region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List the community health workers and their cultural competency scores.", "schema": "CREATE TABLE community_health_workers (worker_id INT, name VARCHAR(50), cultural_competency_score INT);", "sql": "SELECT worker_id, name, cultural_competency_score FROM community_health_workers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'create_view': Write the SELECT query (example 284).", "schema": null, "sql": "select pg_get_ruledef(oid, true) from pg_rewrite\n where ev_class = 'tt23v'::regclass and ev_type = '1';", "explanation": "Regression test for Create View in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_ruledef(oid, true) from pg_rewrite\n where ev_class = 'tt23v'::regclass and ev_type = '1') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the maximum rating of any attraction?", "schema": "CREATE TABLE attractions (id INT, name VARCHAR(50), city VARCHAR(20), rating FLOAT); INSERT INTO attractions (id, name, city, rating) VALUES (1, 'Opera House', 'Sydney', 4.6), (2, 'Bridge', 'Sydney', 3.8), (3, 'Tower', 'New York', 4.8);", "sql": "SELECT MAX(rating) FROM attractions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the average budget of biotech startups in California?", "schema": "CREATE TABLE biotech_startups (id INT, name TEXT, location TEXT, budget FLOAT); INSERT INTO biotech_startups (id, name, location, budget) VALUES (1, 'Genetix', 'California', 15000000.0); INSERT INTO biotech_startups (id, name, location, budget) VALUES (2, 'BioEngineer', 'California', 20000000.0);", "sql": "SELECT AVG(budget) FROM biotech_startups WHERE location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum ITV1 weekly ranking?", "schema": "CREATE TABLE table_25664518_3 (itv1_weekly_ranking INTEGER)", "sql": "SELECT MAX(itv1_weekly_ranking) FROM table_25664518_3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest number of assists for the second rank when there were less than 2 games?", "schema": "CREATE TABLE table_name_71 (assists INTEGER, rank VARCHAR, games VARCHAR)", "sql": "SELECT MAX(assists) FROM table_name_71 WHERE rank = 2 AND games < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average number of accommodations requested per student in the Midwest?", "schema": "CREATE TABLE Accommodations (student_id INT, accommodation_type VARCHAR(50), request_date DATE); CREATE TABLE Students (student_id INT, state VARCHAR(50));", "sql": "SELECT AVG(accommodation_count) FROM (SELECT student_id, COUNT(*) accommodation_count FROM Accommodations A INNER JOIN Students S ON A.student_id = S.student_id WHERE S.state IN ('Indiana', 'Illinois', 'Iowa', 'Kansas', 'Michigan', 'Minnesota', 'Missouri', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin') GROUP BY student_id) T;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 345, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Format(s) of the release on September 23, 2008?", "schema": "CREATE TABLE table_name_40 (format_s_ VARCHAR, date VARCHAR)", "sql": "SELECT format_s_ FROM table_name_40 WHERE date = 'september 23, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Find the number of environmental impact assessments conducted in the last 6 months for each facility.", "schema": "CREATE TABLE environmental_assessments (id INT, facility TEXT, assessment_date DATE); INSERT INTO environmental_assessments (id, facility, assessment_date) VALUES (1, 'Facility1', '2022-01-01'), (2, 'Facility1', '2022-03-15'), (3, 'Facility2', '2022-02-01');", "sql": "SELECT facility, COUNT(*) FROM environmental_assessments WHERE assessment_date >= DATEADD(month, -6, CURRENT_DATE) GROUP BY facility;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the British for the Australian ɔ?", "schema": "CREATE TABLE table_name_93 (british VARCHAR, australian VARCHAR)", "sql": "SELECT british FROM table_name_93 WHERE australian = 'ɔ';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 6).", "schema": null, "sql": "SELECT count(*) FROM macaddrtmp WHERE a >= '22:00:5c:e5:9b:0d';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'temp' (example 164).", "schema": null, "sql": "-- Don't want cursor names and plpgsql function lines in the error messages\n\\set VERBOSITY terse\n\n/* helper function to create cursors for each page in [p_start, p_end] */\nCREATE FUNCTION test_temp_pin(p_start int, p_end int)\nRETURNS void\nLANGUAGE plpgsql\nAS $f$\n DECLARE\n cursorname text;", "explanation": "PL/pgSQL object from PostgreSQL core test for Temp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 295, "num_statements": 1} {"question": "What is the number of patients and their respective genders in remote areas of Arizona?", "schema": "CREATE TABLE patients(id INT, name TEXT, location TEXT, gender TEXT); INSERT INTO patients(id, name, location, gender) VALUES (1, 'Patient A', 'Arizona Remote', 'Female'), (2, 'Patient B', 'Arizona Remote', 'Male'), (3, 'Patient C', 'Arizona Urban', 'Female'), (4, 'Patient D', 'Arizona Urban', 'Non-binary');", "sql": "SELECT COUNT(*) as patient_count, gender FROM patients WHERE location LIKE '%Arizona Remote%' GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the game with a score of 3-2?", "schema": "CREATE TABLE table_name_99 (result VARCHAR, score VARCHAR)", "sql": "SELECT result FROM table_name_99 WHERE score = '3-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 6.00 is the land area in kilometers squared what is the highest population of 2011?", "schema": "CREATE TABLE table_189598_7 (population__2011_ INTEGER, land_area__km²_ VARCHAR)", "sql": "SELECT MAX(population__2011_) FROM table_189598_7 WHERE land_area__km²_ = '6.00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Find the warehouse with the highest average package weight in the 'northeast' region.", "schema": "CREATE TABLE warehouses (id INT, name TEXT, region TEXT); INSERT INTO warehouses (id, name, region) VALUES (1, 'Boston Warehouse', 'northeast'), (2, 'New York Warehouse', 'northeast'); CREATE TABLE packages (id INT, warehouse_id INT, weight FLOAT, state TEXT); INSERT INTO packages (id, warehouse_id, weight, state) VALUES (1, 1, 28.5, 'Massachusetts'), (2, 1, 23.3, 'New York'), (3, 2, 30.8, 'New York');", "sql": "SELECT w.name FROM warehouses w JOIN (SELECT warehouse_id, MAX(avg_weight) as avg_weight FROM (SELECT warehouse_id, AVG(weight) as avg_weight FROM packages GROUP BY warehouse_id) sub) max_weight ON w.id = max_weight.warehouse_id WHERE w.region = 'northeast';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which continent has the most diverse languages?", "schema": "CREATE TABLE countrylanguage (CountryCode VARCHAR); CREATE TABLE country (Continent VARCHAR, Code VARCHAR)", "sql": "SELECT T1.Continent FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode GROUP BY T1.Continent ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "List all military equipment sales in the European Union", "schema": "CREATE TABLE military_equipment_sales (id INT, country VARCHAR(255), sale_value FLOAT); INSERT INTO military_equipment_sales (id, country, sale_value) VALUES (1, 'Country A', 2000000); INSERT INTO military_equipment_sales (id, country, sale_value) VALUES (2, 'Country B', 3000000);", "sql": "SELECT sale_value FROM military_equipment_sales WHERE country IN ('European Union');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of value for revenue more than 193 and rank less than 12 for manchester united", "schema": "CREATE TABLE table_name_21 (value__ VARCHAR, team VARCHAR, revenue__$m_ VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(value__) AS $m_ FROM table_name_21 WHERE revenue__$m_ > 193 AND rank < 12 AND team = 'manchester united';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "List all employees who have not completed the required safety training for their department.", "schema": "CREATE TABLE Employees (id INT, name VARCHAR(255), department VARCHAR(255)); CREATE TABLE Training (id INT, employee INT, completed BOOLEAN); INSERT INTO Employees (id, name, department) VALUES (1, 'John Doe', 'DeptA'), (2, 'Jane Smith', 'DeptB'); INSERT INTO Training (id, employee, completed) VALUES (1, 1, TRUE), (2, 2, FALSE);", "sql": "SELECT e.name, e.department FROM Employees e LEFT JOIN Training t ON e.id = t.employee WHERE t.completed IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What is the total water usage in Cairo and Istanbul?", "schema": "CREATE TABLE water_usage_ME (city VARCHAR(50), usage INT); INSERT INTO water_usage_ME (city, usage) VALUES ('Cairo', 12000), ('Istanbul', 8000);", "sql": "SELECT SUM(usage) FROM water_usage_ME WHERE city IN ('Cairo', 'Istanbul');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'xmlmap': Write the SELECT query (example 14).", "schema": null, "sql": "SELECT table_to_xmlschema('testxmlschema.test1', true, false, '');", "explanation": "Regression test for Xmlmap in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT table_to_xmlschema('testxmlschema.test1', true, false, '')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people were in attendance on may 17?", "schema": "CREATE TABLE table_name_7 (attendance INTEGER, date VARCHAR)", "sql": "SELECT MIN(attendance) FROM table_name_7 WHERE date = 'may 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 841).", "schema": null, "sql": "INSERT INTO num_typemod_test (millionths) VALUES (0.0009995);", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--3.2.1--4.0.0, item 1).", "schema": null, "sql": "-- IMPORTANT POSTGRESQL 11 UPGRADE STEPS\n -- All foreign keys, any non-unique indexes, and any unique indexes that include the partition key must be created on the actual parent table and removed from the template table for those property inheritances to continue working. Foreign key inheritance from the template table no longer works at all.\n -- Any unique indexes or primary keys that do not include the partition column must still use the template table in order to be inherited to child tables.\n -- Indexes defined on the parent will take precidence over those defined on the template table. Duplicate indexes should not happen. If they do, please report this as a bug.\n\n-- Note that if this version is installed on PG10 and older, the procedures will not be created. A migration script to update pg_partman to include those will be made available once PG11 stable has been released.\n\n-- Major change in privilege/security of this extension. SECURITY DEFINER is no longer in use to cause most of pg_partman's functions to run as the owner of the called function. This is mostly due to the requirements to support PROCEDUREs in PG11, but it's also generally a better idea for security in the long run.\n -- It is recommended to create a dedicated role for pg_partman maintenance runs and give it the privileges outlined in the setup section of the top level README.\n -- The role running maintenance must now have permissions to create tables in the target schema. If you start seeing permission errors after upgrading, this is most likely what needs to be fixed.\n -- The role running maintenance must now always be the owner of the tables it is managing. This shouldn't be too drastic of a change since previously it was making the maintenace function owner the owner of the partitioned tables. However, if you've changed partition table ownership after initial creation, you may run into issues until you set the pg_partman role as the owner.\n -- If you are using logical replication, the partman maintenance role must be the owner of the publication in use.\n -- As a bonus, the ALTER DEFAULT PRIVILEGES feature should now be much easier to use since you have a dedicated role for maintaining partitions. Check the documentation for this feature so that you can set any read/write privileges you want new partition sets to have at the time they are initially created.\n -- Two functions retain their SECURITY DEFINER status:\n -- check_name_length() - Used inside the trigger function and I don't want to break existing installations. It's an IMMUTABLE function so the security implications aren't that great.\n -- apply_privileges() - Requires access to pg_authid & also potentially changes object ownership.\n -- If you're using pg_jobmon, privileges for that extension are managed independently of pg_partman. See that extension's documentation for privileges it requires.\n\n-- For PostgreSQL 11+, started conversion of some python scripts into PROCEDUREs that are now installed within the database. Scripts are still available for pre-11 versions, but will be phased out as soon as PG10 is no longer under support.\n -- Note that the user that calls these procedures must have permissions to write to the partition set and create/drop partitions in the set.\n -- partition_data.py script is now the partition_data_proc() PROCEDURE.\n -- undo_partition.py script is now the undo_partition_proc() PROCEDURE.\n -- reapply_constraints.py is now the reapply_constraints_proc() PROCEDURE.\n -- reapply_foreign_keys.py not converted since it's not needed for PG11+. Foreign keys can be applied to the parent and will automatically be inherited.\n -- reapply_indexes.py was not converted yet because concurrent index creation is not yet possible inside a PROCEDURE.\n -- Unable to turn autovacuum off/on reliably in PROCEDURE as was previously done with python scripts. If autovac is an issue when (un)partitioning large tables, recommend manually disabling it.\n\n-- Consolidated undo_partition_time(), undo_partition_id(), and undo_partition_native() functions into a single undo_partition() function.\n-- The old undo_partition() function that just copied data from child tables to the parent has been removed. The only utility this really had was a possible way to support undoing partition sets not maintained by pg_partman. With native partitioning now in place, its continued usability is now questionable and it was also causing confusion since people were using it and not the previous ones specialized for pg_partman. Can incorporate back in if someone can provide a further beneficial use case.\n\n-- For PG11+, a DEFAULT partition is automatically created as part of the partition set to handle any data that does not have a matching child table.\n -- It's just given a _default suffix on the existing parent table name.\n -- Support within pg_partman for moving data out of the default partition is not included in 4.0.0, but will be included in a future release.\n -- New parameter to show_partitions() can include the DEFAULT in its output, but it's not output unless specifically requested since it's not commonly needed in internal code where it's mostly used.\n\n-- For native partitioning, added option to give a \"source\" table to the partition_data*() functions that is different from the parent table of the partition set. This assists with partitioning an existing table to a natively partitioned table.\n\n-- For PostgreSQL 11+, run_maintenance_proc() has been added and should cause much less contention when running against many large partition sets in a single call.\n -- For PG11+, this is the preferred maintenance method to run over the old run_maintenance() function. The old one will continue to work for the next few major releases, but please update all pg_partman maintenence calls in your environment to use the new procedure.\n\n-- Simplified --type argument for undo_partition.py script for PG versions 10 and below. Similar to create_parent(), values are now 'partman' or 'native'.\n\n-- Maintenance that creates new partitions on PG11+ and is using native partitioning will no longer analyze the partition sets by default. This should greatly cut down on maintenance run time and contention issues. If you're noticing any odd query plans, especially if you're using additionally configured constraints, you may need to schedule some manual analyzes or you can set the p_analyze parameter in either the function or procedure call to \"true\".\n\n-- Fixed bug in run_maintenance() with subpartitioning where it would throw an error if retention dropped one of the parent tables undergoing maintenance.\n\n-- Fixed template table inheritance for PG10 not working on minor version PostgreSQL 10.0 (but you really shouldn't be running this version anymore).\n\n-- Fixed compilation errors for background worker against PostgreSQL 11.\n\n\n-- ######################## START POSTGRESQL 11 ONLY SECTION ##############################\nDO $pg11only$\nDECLARE\n\nv_partition_data_sql text;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 7060, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the highest losses for a byes larger than 2?", "schema": "CREATE TABLE table_name_99 (losses INTEGER, byes INTEGER)", "sql": "SELECT MAX(losses) FROM table_name_99 WHERE byes > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position does allister brown play.", "schema": "CREATE TABLE table_2850912_12 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_2850912_12 WHERE player = 'Allister Brown';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player has less than 10 Touchdowns and 0 Extra points and more than 5 Points?", "schema": "CREATE TABLE table_name_77 (player VARCHAR, points VARCHAR, touchdowns VARCHAR, extra_points VARCHAR)", "sql": "SELECT player FROM table_name_77 WHERE touchdowns < 10 AND extra_points = 0 AND points > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Maidens when E.R. is less than 9.5, and a Overs Bowled larger than 57, and a Wickets of 9?", "schema": "CREATE TABLE table_name_95 (maidens VARCHAR, wickets VARCHAR, er VARCHAR, overs_bowled VARCHAR)", "sql": "SELECT COUNT(maidens) FROM table_name_95 WHERE er < 9.5 AND overs_bowled > 57 AND wickets = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Update the 'start_date' of the 'Unmanned Aerial Vehicle' project for 'Brown Security' to 2024-01-01 if the current start_date is before 2024-01-01.", "schema": "CREATE TABLE BrownSecurityProjects(id INT, contractor VARCHAR(255), project VARCHAR(255), start_date DATE, end_date DATE);INSERT INTO BrownSecurityProjects(id, contractor, project, start_date, end_date) VALUES (1, 'Brown Security', 'Unmanned Aerial Vehicle', '2023-01-01', '2025-12-31');", "sql": "UPDATE BrownSecurityProjects SET start_date = '2024-01-01' WHERE contractor = 'Brown Security' AND project = 'Unmanned Aerial Vehicle' AND start_date < '2024-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Delete all player records from the 'Players to Delete' list.", "schema": "CREATE TABLE players (id INT, name VARCHAR(255), age INT, country VARCHAR(255)); CREATE TABLE players_to_delete (id INT); INSERT INTO players (id, name, age, country) VALUES (1, 'John Doe', 25, 'USA'), (2, 'Jane Doe', 30, 'Canada'); INSERT INTO players_to_delete (id) VALUES (1), (3);", "sql": "DELETE FROM players WHERE id IN (SELECT id FROM players_to_delete);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 49).", "schema": null, "sql": "--\n-- Domains within composite\n--\n\nCREATE TYPE nnint_container AS (f1 int, f2 nnint);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are enrolled in 2 degree programs in one semester? List the first name, middle name and last name and the id.", "schema": "CREATE TABLE Students (first_name VARCHAR, middle_name VARCHAR, last_name VARCHAR, student_id VARCHAR); CREATE TABLE Student_Enrolment (student_id VARCHAR)", "sql": "SELECT T1.first_name, T1.middle_name, T1.last_name, T1.student_id FROM Students AS T1 JOIN Student_Enrolment AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id HAVING COUNT(*) = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "Delete all records from the Volunteers table where the VolunteerActivity is 'Clothing Drive'.", "schema": "CREATE TABLE Volunteers (VolunteerID INT, VolunteerName VARCHAR(50), HoursSpent INT, VolunteerActivity VARCHAR(50), VolunteerDate DATE);", "sql": "DELETE FROM Volunteers WHERE VolunteerActivity = 'Clothing Drive';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "get the number of unique customers in the 'Eco-friendly' size range", "schema": "CREATE TABLE customers (id INT, name VARCHAR(50), size VARCHAR(20), sustainable_range BOOLEAN); INSERT INTO customers (id, name, size, sustainable_range) VALUES (1, 'Alice', 'M', true); INSERT INTO customers (id, name, size, sustainable_range) VALUES (2, 'Bob', 'XL', true); INSERT INTO customers (id, name, size, sustainable_range) VALUES (3, 'Charlie', 'S', false);", "sql": "SELECT COUNT(DISTINCT id) FROM customers WHERE sustainable_range = true AND size IN ('XS', 'S', 'M', 'L', 'XL');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What is the maximum weight of a penguin for each species?", "schema": "CREATE TABLE penguins (id INT, species VARCHAR(20), weight FLOAT);", "sql": "SELECT species, MAX(weight) FROM penguins GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the veteran employment rate in the state of New York?", "schema": "CREATE TABLE veteran_employment (veteran_id INT, state VARCHAR(255), employed BOOLEAN); INSERT INTO veteran_employment (veteran_id, state, employed) VALUES (7, 'New York', TRUE), (8, 'New York', FALSE), (9, 'New York', TRUE);", "sql": "SELECT (COUNT(*) - SUM(employed)) * 100.0 / COUNT(*) as employment_rate FROM veteran_employment WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Retrieve the names and maintenance dates of structures in Texas with a resilience score greater than 85", "schema": "CREATE TABLE Infrastructure (id INT, name VARCHAR(255), type VARCHAR(255), location VARCHAR(255), resilience_score INT); CREATE TABLE Maintenance (id INT, infrastructure_id INT, maintenance_date DATE); INSERT INTO Infrastructure (id, name, type, location, resilience_score) VALUES (1, 'Road A', 'Road', 'Texas', 88); INSERT INTO Infrastructure (id, name, type, location, resilience_score) VALUES (2, 'Bridge B', 'Bridge', 'California', 70); INSERT INTO Maintenance (id, infrastructure_id, maintenance_date) VALUES (1, 1, '2022-01-01'); INSERT INTO Maintenance (id, infrastructure_id, maintenance_date) VALUES (2, 2, '2022-02-01');", "sql": "SELECT Infrastructure.name, Maintenance.maintenance_date FROM Infrastructure INNER JOIN Maintenance ON Infrastructure.id = Maintenance.infrastructure_id WHERE Infrastructure.location = 'Texas' AND Infrastructure.resilience_score > 85;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 114).", "schema": null, "sql": "select '$ ? (@.a < -0.1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@.a < -0.1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the maximum range of Level 2 EV chargers?", "schema": "CREATE TABLE Chargers (Id INT, Type VARCHAR(255), Manufacturer VARCHAR(255), Range INT); INSERT INTO Chargers (Id, Type, Manufacturer, Range) VALUES (1, 'Level 2', 'Blink', 25), (2, 'Level 2', 'ChargePoint', 30), (3, 'Level 2', 'EVgo', 28), (4, 'Level 2', 'SemaConnect', 32);", "sql": "SELECT MAX(Range) FROM Chargers WHERE Type = 'Level 2';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is 2nd Member, when Assembled is \"30 March 1298\"?", "schema": "CREATE TABLE table_name_96 (assembled VARCHAR)", "sql": "SELECT 2 AS nd_member FROM table_name_96 WHERE assembled = '30 march 1298';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE CAST (example 4).", "schema": null, "sql": "SELECT CAST ( 2 AS numeric ) + 4.0;", "explanation": "PostgreSQL CREATE CAST command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With a house edge of 3.53% and a Non-Suited Matched of 3:1, name the Double Non-Suited Match.", "schema": "CREATE TABLE table_name_99 (Double VARCHAR, non_suited_match VARCHAR, house_edge VARCHAR)", "sql": "SELECT Double AS non_suited_match FROM table_name_99 WHERE non_suited_match = '3:1' AND house_edge = '3.53%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "How many employees are there in each department?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50), Salary FLOAT); INSERT INTO Employees (EmployeeID, Name, Department, Position, Salary) VALUES (1, 'John Doe', 'IT', 'Developer', 75000.00), (2, 'Jane Smith', 'IT', 'Developer', 80000.00), (3, 'Alice Johnson', 'Marketing', 'Marketing Specialist', 60000.00), (4, 'Bob Brown', 'HR', 'HR Specialist', 65000.00);", "sql": "SELECT Department, COUNT(*) FROM Employees GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which players have a pick number of 27?", "schema": "CREATE TABLE table_10975034_4 (player VARCHAR, pick__number VARCHAR)", "sql": "SELECT player FROM table_10975034_4 WHERE pick__number = 27;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many passengers traveled on each route on a specific date?", "schema": "CREATE TABLE trips (route_id INT, trip_date DATE); INSERT INTO trips (route_id, trip_date) VALUES (1, '2022-05-01'), (1, '2022-05-01'), (2, '2022-05-01'), (3, '2022-05-01'), (3, '2022-05-01');", "sql": "SELECT r.route_name, t.trip_date, COUNT(t.route_id) AS passengers FROM trips t JOIN routes r ON t.route_id = r.route_id GROUP BY r.route_name, t.trip_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "How many unique volunteers worked on each project in 2022?", "schema": "CREATE TABLE projects (id INT, name VARCHAR(50), start_date DATE, end_date DATE); CREATE TABLE volunteers (id INT, name VARCHAR(50), project_id INT, volunteer_date DATE); INSERT INTO projects (id, name, start_date, end_date) VALUES (1, 'Project A', '2022-01-01', '2022-12-31'), (2, 'Project B', '2022-07-01', '2022-12-31'); INSERT INTO volunteers (id, name, project_id, volunteer_date) VALUES (1, 'Volunteer 1', 1, '2022-02-01'), (2, 'Volunteer 2', 1, '2022-03-01'), (3, 'Volunteer 3', 2, '2022-08-01');", "sql": "SELECT p.name, COUNT(DISTINCT v.id) AS num_volunteers FROM projects p JOIN volunteers v ON p.id = v.project_id WHERE YEAR(v.volunteer_date) = 2022 GROUP BY p.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "List all sustainable building practices in the state of Washington", "schema": "CREATE TABLE sustainable_practices (practice_id INT, building_type VARCHAR(20), state VARCHAR(20), description TEXT); INSERT INTO sustainable_practices (practice_id, building_type, state, description) VALUES (1, 'Residential', 'WA', 'Use of recycled materials');", "sql": "SELECT * FROM sustainable_practices WHERE state = 'WA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What was the total budget for community development initiatives in Africa in the year 2020?", "schema": "CREATE TABLE community_development (id INT, initiative_name VARCHAR(50), location VARCHAR(50), budget FLOAT, implementation_date DATE); INSERT INTO community_development (id, initiative_name, location, budget, implementation_date) VALUES (1, 'Village Technology Center', 'Kenya', 35000.00, '2020-03-15');", "sql": "SELECT SUM(budget) FROM community_development WHERE location LIKE '%Africa%' AND implementation_date >= '2020-01-01' AND implementation_date <= '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the director of the best foreign film?", "schema": "CREATE TABLE table_name_14 (director VARCHAR, original_title VARCHAR)", "sql": "SELECT director FROM table_name_14 WHERE original_title = 'best foreign film';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total revenue from cultural heritage experiences in Indonesia?", "schema": "CREATE TABLE Countries (id INT, name VARCHAR(50)); INSERT INTO Countries (id, name) VALUES (1, 'Indonesia'); CREATE TABLE Transactions (id INT, country_id INT, experience_type VARCHAR(50), revenue INT); INSERT INTO Transactions (id, country_id, experience_type, revenue) VALUES (1, 1, 'Cultural Heritage', 800);", "sql": "SELECT SUM(t.revenue) as total_revenue FROM Transactions t JOIN Countries c ON t.country_id = c.id WHERE t.experience_type = 'Cultural Heritage';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Date, when Against is \"22\"?", "schema": "CREATE TABLE table_name_87 (date VARCHAR, against VARCHAR)", "sql": "SELECT date FROM table_name_87 WHERE against = 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 51).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '\"123\"', '$' RETURNING int) + 234;", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '\"123\"', '$' RETURNING int) + 234) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 58, "num_statements": 1} {"question": "Add a new entry to the 'virtual_reality_headsets' table with ID 5, name 'Oculus Rift S', and price 399", "schema": "CREATE TABLE virtual_reality_headsets (id INT, name VARCHAR(255), price INT);", "sql": "INSERT INTO virtual_reality_headsets (id, name, price) VALUES (5, 'Oculus Rift S', 399);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which British has Examples of exit?", "schema": "CREATE TABLE table_name_88 (british VARCHAR, examples VARCHAR)", "sql": "SELECT british FROM table_name_88 WHERE examples = 'exit';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average engagement for virtual tours in the 'Europe' region?", "schema": "CREATE TABLE virtual_tours_engagement (tour_id INT, name TEXT, region TEXT, engagement INT); INSERT INTO virtual_tours_engagement (tour_id, name, region, engagement) VALUES (1, 'Tour A', 'Americas', 1000), (2, 'Tour B', 'Europe', 800), (3, 'Tour C', 'Asia', 1200);", "sql": "SELECT AVG(engagement) FROM virtual_tours_engagement WHERE region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total weight of organic apples and bananas shipped from Costa Rica to New York in the first quarter of 2022?", "schema": "CREATE TABLE shipments(id INT, product VARCHAR(20), weight FLOAT, country VARCHAR(20), date DATE); INSERT INTO shipments(id, product, weight, country, date) VALUES (1, 'apples', 500, 'Costa Rica', '2022-01-05'); INSERT INTO shipments(id, product, weight, country, date) VALUES (2, 'bananas', 800, 'Costa Rica', '2022-01-07');", "sql": "SELECT SUM(weight) FROM shipments WHERE product IN ('apples', 'bananas') AND country = 'Costa Rica' AND date BETWEEN '2022-01-01' AND '2022-03-31' AND product LIKE 'organic%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game when the record was 28–19?", "schema": "CREATE TABLE table_name_62 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_62 WHERE record = '28–19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "How many defense diplomacy events took place between India and Pakistan from 2015 to 2019?", "schema": "CREATE TABLE DiplomacyEvents(Country1 NVARCHAR(50), Country2 NVARCHAR(50), EventType VARCHAR(50), Year INT);INSERT INTO DiplomacyEvents(Country1, Country2, EventType, Year) VALUES ('India', 'Pakistan', 'Defense Talks', 2015), ('Pakistan', 'India', 'Military Exercise', 2016), ('India', 'Pakistan', 'Joint Military Training', 2017), ('Pakistan', 'India', 'Defense Talks', 2018), ('India', 'Pakistan', 'Military Exercise', 2019);", "sql": "SELECT COUNT(*) AS Total_Events FROM DiplomacyEvents WHERE (Country1 = 'India' AND Country2 = 'Pakistan') OR (Country1 = 'Pakistan' AND Country2 = 'India') AND Year BETWEEN 2015 AND 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance for the game after game 4?", "schema": "CREATE TABLE table_name_96 (attendance VARCHAR, game INTEGER)", "sql": "SELECT attendance FROM table_name_96 WHERE game > 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "How many steps did members take in total, in the past week, separated by membership type?", "schema": "CREATE TABLE membership (member_id INT, membership_type VARCHAR(20), gender VARCHAR(10)); INSERT INTO membership (member_id, membership_type, gender) VALUES (1, 'Platinum', 'Female'), (2, 'Gold', 'Male'), (3, 'Platinum', 'Non-binary'); CREATE TABLE activity_data (member_id INT, steps INT, timestamp TIMESTAMP); INSERT INTO activity_data (member_id, steps, timestamp) VALUES (1, 5000, '2022-01-01 10:00:00'), (1, 7000, '2022-01-01 11:00:00'), (2, 8000, '2022-01-01 10:00:00'), (2, 9000, '2022-01-01 11:00:00'), (3, 4000, '2022-01-01 10:00:00'), (3, 6000, '2022-01-01 11:00:00');", "sql": "SELECT membership_type, SUM(steps) as total_steps FROM activity_data a JOIN membership m ON a.member_id = m.member_id WHERE timestamp BETWEEN '2022-01-01 00:00:00' AND '2022-01-08 23:59:59' GROUP BY membership_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 60).", "schema": null, "sql": "CREATE FUNCTION gbt_int2_fetch(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 183).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the maximum duration of a single workout session for each member?", "schema": "CREATE TABLE members (member_id INT, name VARCHAR(50), gender VARCHAR(10), dob DATE); INSERT INTO members (member_id, name, gender, dob) VALUES (1, 'Leila Alvarez', 'Female', '2002-04-02'); INSERT INTO members (member_id, name, gender, dob) VALUES (2, 'Mohammed Ibrahim', 'Male', '2000-11-28'); CREATE TABLE workout_sessions (session_id INT, member_id INT, session_date DATE, duration INT); INSERT INTO workout_sessions (session_id, member_id, session_date, duration) VALUES (1, 1, '2023-02-02', 45); INSERT INTO workout_sessions (session_id, member_id, session_date, duration) VALUES (2, 1, '2023-02-05', 60); INSERT INTO workout_sessions (session_id, member_id, session_date, duration) VALUES (3, 2, '2023-02-07', 75); INSERT INTO workout_sessions (session_id, member_id, session_date, duration) VALUES (4, 1, '2023-02-13', 30);", "sql": "SELECT member_id, MAX(duration) AS max_duration FROM workout_sessions GROUP BY member_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "How many virtual tours were conducted in Japan in the past month?", "schema": "CREATE TABLE virtual_tours (tour_id INT, tour_name TEXT, country TEXT, tour_date DATE); INSERT INTO virtual_tours (tour_id, tour_name, country, tour_date) VALUES (1, 'Mt. Fuji Tour', 'Japan', '2022-03-05'), (2, 'Tokyo City Tour', 'Japan', '2022-03-10');", "sql": "SELECT COUNT(*) FROM virtual_tours WHERE country = 'Japan' AND tour_date >= DATEADD(day, -30, CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "What is the total rainfall (in inches) for each region in Texas in the first half of 2022?", "schema": "CREATE TABLE regions (id INT, name VARCHAR(255)); INSERT INTO regions (id, name) VALUES (1, 'RegionA'), (2, 'RegionB'), (3, 'RegionC'); CREATE TABLE rainfall (region_id INT, rainfall DECIMAL(5,2), date DATE); INSERT INTO rainfall (region_id, rainfall, date) VALUES (1, 2.5, '2022-01-01'), (1, 3.0, '2022-01-02'), (2, 1.5, '2022-01-01'), (2, 2.0, '2022-01-02'), (3, 3.5, '2022-01-01'), (3, 4.0, '2022-01-02');", "sql": "SELECT region_id, SUM(rainfall) as total_rainfall FROM rainfall WHERE date BETWEEN '2022-01-01' AND '2022-06-30' GROUP BY region_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the election of the incumbent harry r. sheppard?", "schema": "CREATE TABLE table_1341973_6 (result VARCHAR, incumbent VARCHAR)", "sql": "SELECT result FROM table_1341973_6 WHERE incumbent = 'Harry R. Sheppard';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sqljson_queryfuncs' (example 244).", "schema": null, "sql": "CREATE INDEX ON test_jsonb_mutability (JSON_VALUE(js, '$' DEFAULT random()::int ON ERROR));", "explanation": "DDL from PostgreSQL core regression test for Sqljson Queryfuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Delete all shipments that were handled by the 'Sydney' warehouse.", "schema": "CREATE TABLE Warehouse (id INT, name VARCHAR(20), city VARCHAR(20)); INSERT INTO Warehouse (id, name, city) VALUES (1, 'Sydney Warehouse', 'Sydney'); CREATE TABLE Handling (id INT, shipment_id INT, warehouse_id INT); INSERT INTO Handling (id, shipment_id, warehouse_id) VALUES (1, 101, 1), (2, 102, 1), (3, 103, 2); CREATE TABLE Shipment (id INT, weight INT); INSERT INTO Shipment (id, weight) VALUES (101, 10000), (102, 15000), (103, 8000);", "sql": "DELETE FROM Shipment WHERE id IN (SELECT Handling.shipment_id FROM Handling JOIN Warehouse ON Handling.warehouse_id = Warehouse.id WHERE Warehouse.city = 'Sydney');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who directed the episode that had 6.76 million U.S. viewers?", "schema": "CREATE TABLE table_23242968_1 (directed_by VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT directed_by FROM table_23242968_1 WHERE us_viewers__millions_ = '6.76';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the largest crowd size at arden street oval?", "schema": "CREATE TABLE table_name_58 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT MAX(crowd) FROM table_name_58 WHERE venue = 'arden street oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team does the player who went to Nebraska play for?", "schema": "CREATE TABLE table_21321804_3 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT cfl_team FROM table_21321804_3 WHERE college = 'Nebraska';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average CO2 emissions reduction achieved per sustainable fabric type, grouped by textile supplier?", "schema": "CREATE TABLE TextileSuppliers (supplier_id INT, supplier_country VARCHAR(20), CO2_emissions_reduction FLOAT); CREATE TABLE Fabrics (fabric_id INT, fabric_type VARCHAR(20), supplier_id INT); INSERT INTO TextileSuppliers (supplier_id, supplier_country, CO2_emissions_reduction) VALUES (1, 'Italy', 15.2);", "sql": "SELECT Fabrics.fabric_type, AVG(TextileSuppliers.CO2_emissions_reduction) as avg_reduction FROM Fabrics JOIN TextileSuppliers ON Fabrics.supplier_id = TextileSuppliers.supplier_id GROUP BY Fabrics.fabric_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "What is the total budget spent on each project in 2022?", "schema": "CREATE TABLE Projects (id INT, project_name TEXT, budget_allocated FLOAT, start_date DATE);", "sql": "SELECT project_name, SUM(budget_allocated) as total_budget_spent FROM Projects WHERE YEAR(start_date) = 2022 GROUP BY project_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who wrote the episode with the production code of BN103?", "schema": "CREATE TABLE table_21304155_1 (written_by VARCHAR, production_code VARCHAR)", "sql": "SELECT written_by FROM table_21304155_1 WHERE production_code = 'BN103';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 454).", "schema": null, "sql": "INSERT INTO num_exp_log10 VALUES (6,'-1.328484003982869642690619298690906747763234110040562640557173509402512757735587333095924652711056556491908059708986413635120656426593745303715671199761364516107844087845783714418487426723538440387069985879601248897538855843115404484229652166941838283489828419407478748732927617251897244190697443966424660881366993754577233476597163021768156814527570512834684713730559883782625870597080940193303268818336816535968869931456641949301731046034660616615392129109391145214470757259042172416816936479713743188047425796931722546185493217275537303458837771965375448968719169174136287532752370175863826715450565025635651343928205805494319778539652563499901671319955144823432132740582617949774638538594081514904904341299199113721131520557004571803778698005652464301037962272085633628653321081368256925971558076970172779715');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 853, "num_statements": 1} {"question": "How many unique clothing items were produced in size XS and size XXL in the first quarter of 2021?", "schema": "CREATE TABLE ClothingItems (ItemID INT, ItemName TEXT, Size TEXT, ProductionDate DATE); INSERT INTO ClothingItems (ItemID, ItemName, Size, ProductionDate) VALUES (1, 'Organic Cotton T-Shirt', 'XS', '2021-01-05'), (2, 'Bamboo Viscose Blouse', 'XXL', '2021-01-10'), (3, 'Recycled Polyester Pants', 'XS', '2021-02-15'), (4, 'Tencel Jacket', 'XXL', '2021-03-01');", "sql": "SELECT COUNT(DISTINCT ItemID) as UniqueItems FROM ClothingItems WHERE Size IN ('XS', 'XXL') AND ProductionDate BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which giant slalom had an Overall number of 25?", "schema": "CREATE TABLE table_name_34 (Giant VARCHAR, overall VARCHAR)", "sql": "SELECT Giant AS slalom FROM table_name_34 WHERE overall = 25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest pick number for the Russian Major League?", "schema": "CREATE TABLE table_name_70 (pick__number INTEGER, league_from VARCHAR)", "sql": "SELECT MIN(pick__number) FROM table_name_70 WHERE league_from = 'russian major league';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Reason is given when 1103 is the date for Became heir?", "schema": "CREATE TABLE table_name_7 (reason VARCHAR, became_heir VARCHAR)", "sql": "SELECT reason FROM table_name_7 WHERE became_heir = '1103';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the Away team is essendon, what was the Date they played?", "schema": "CREATE TABLE table_name_62 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_62 WHERE away_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What production code does episode 174 of Melrose place have?", "schema": "CREATE TABLE table_name_92 (production_code VARCHAR, no_in_series VARCHAR)", "sql": "SELECT production_code FROM table_name_92 WHERE no_in_series = 174;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Alexandr Elke's Weight?", "schema": "CREATE TABLE table_name_59 (weight VARCHAR, name VARCHAR)", "sql": "SELECT weight FROM table_name_59 WHERE name = 'alexandr elke';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Find carbon offset initiatives in Australia and their corresponding funded amounts.", "schema": "CREATE TABLE carbon_offsets (id INT, country VARCHAR(255), initiative VARCHAR(255), funded_amount INT); INSERT INTO carbon_offsets (id, country, initiative, funded_amount) VALUES (1, 'Australia', 'Tree planting', 50000), (2, 'Australia', 'Renewable energy', 100000), (3, 'Canada', 'Energy efficiency', 75000);", "sql": "SELECT initiative, funded_amount FROM carbon_offsets WHERE country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List all pollution control initiatives in the North Atlantic and their corresponding funding amounts.", "schema": "CREATE TABLE pollution_control_initiatives (id INT, initiative TEXT, region TEXT, funding FLOAT); INSERT INTO pollution_control_initiatives (id, initiative, region, funding) VALUES (1, 'Initiative X', 'North Atlantic', 500000), (2, 'Initiative Y', 'Arctic', 700000), (3, 'Initiative Z', 'North Atlantic', 600000);", "sql": "SELECT initiative, funding FROM pollution_control_initiatives WHERE region = 'North Atlantic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 11).", "schema": null, "sql": "SELECT sec_to_gc(100000)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many categories for elapsed time exist for the $16.50 fixed-limit badugi game?", "schema": "CREATE TABLE table_22050544_3 (elapsed_time VARCHAR, event VARCHAR)", "sql": "SELECT COUNT(elapsed_time) FROM table_22050544_3 WHERE event = '$16.50 Fixed-Limit Badugi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Calculate the average daily water consumption in the state of Rajasthan, India for the month of May", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); INSERT INTO states (id, name) VALUES (1, 'Rajasthan'); CREATE TABLE water_meter_readings (id INT, state_id INT, consumption FLOAT, reading_date DATE); INSERT INTO water_meter_readings (id, state_id, consumption, reading_date) VALUES (1, 1, 100, '2022-05-01');", "sql": "SELECT AVG(water_meter_readings.consumption) as avg_daily_consumption FROM water_meter_readings WHERE water_meter_readings.reading_date >= '2022-05-01' AND water_meter_readings.reading_date <= '2022-05-31' AND water_meter_readings.state_id IN (SELECT id FROM states WHERE name = 'Rajasthan');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "What is the total budget for education in Ontario?", "schema": "CREATE TABLE budgets (id INT, category VARCHAR(50), amount INT, province VARCHAR(20)); INSERT INTO budgets (id, category, amount, province) VALUES (1, 'Education', 30000000, 'Ontario'); INSERT INTO budgets (id, category, amount, province) VALUES (2, 'Healthcare', 40000000, 'Ontario');", "sql": "SELECT SUM(amount) FROM budgets WHERE category = 'Education' AND province = 'Ontario';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average soil moisture level for crops in the 'South' region?", "schema": "CREATE TABLE IoTDevices (device_id INT, device_type VARCHAR(20), region VARCHAR(10), moisture FLOAT); INSERT INTO IoTDevices (device_id, device_type, region, moisture) VALUES (1, 'Soil Moisture Sensor', 'West', 50.5); INSERT INTO IoTDevices (device_id, device_type, region, moisture) VALUES (2, 'Soil Moisture Sensor', 'East', 55.3); INSERT INTO IoTDevices (device_id, device_type, region, moisture) VALUES (3, 'Soil Moisture Sensor', 'North', 60.1); INSERT INTO IoTDevices (device_id, device_type, region, moisture) VALUES (4, 'Soil Moisture Sensor', 'South', 65.7);", "sql": "SELECT AVG(moisture) FROM IoTDevices WHERE region = 'South';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Bhiwani district have what constituency number?", "schema": "CREATE TABLE table_name_37 (constituency_number VARCHAR, district VARCHAR)", "sql": "SELECT constituency_number FROM table_name_37 WHERE district = 'bhiwani';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many public hospitals are there in each location type?", "schema": "CREATE TABLE hospitals (name VARCHAR(50), type VARCHAR(50), beds INT, location VARCHAR(50)); INSERT INTO hospitals (name, type, beds, location) VALUES ('Hospital A', 'Public', 300, 'City'); INSERT INTO hospitals (name, type, beds, location) VALUES ('Hospital B', 'Private', 200, 'Suburban'); INSERT INTO hospitals (name, type, beds, location) VALUES ('Hospital C', 'Public', 400, 'Rural');", "sql": "SELECT location, COUNT(*) as NumPublicHospitals FROM hospitals WHERE type = 'Public' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of a Record 24–23–3?", "schema": "CREATE TABLE table_name_69 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_69 WHERE record = '24–23–3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "pgTAP test for Proctap (assertion 16).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_procedure( 'procschema', 'argproc', ARRAY['integer', 'text'], 'whatever' ),\n false,\n 'isnt_procedure(schema, proc, args, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Proctap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Which countries have the most organizations working on accessible technology?", "schema": "CREATE TABLE accessible_tech (organization_name TEXT, country TEXT); INSERT INTO accessible_tech (organization_name, country) VALUES ('AccessibleTech', 'Canada'), ('EqualTech', 'United States'), ('InclusiveTech', 'United Kingdom'), ('AccessTech', 'Canada'), ('EqualAI', 'United States');", "sql": "SELECT country, COUNT(*) as organization_count FROM accessible_tech GROUP BY country ORDER BY organization_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "PostgreSQL Rangetypes: show example 6.", "schema": null, "sql": "CREATE TYPE floatrange AS RANGE ( subtype = float8, subtype_diff = float8mi ); SELECT '[1.234, 5.678]'::floatrange;", "explanation": "Example from PostgreSQL documentation on Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 115, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What is the crowd size of the game at Brunswick Street Oval?", "schema": "CREATE TABLE table_name_61 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT SUM(crowd) FROM table_name_61 WHERE venue = 'brunswick street oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the budget for each military technology program in the United Kingdom?", "schema": "CREATE TABLE MilitaryTechnology (id INT, country VARCHAR(255), program VARCHAR(255), budget FLOAT); INSERT INTO MilitaryTechnology (id, country, program, budget) VALUES (1, 'United Kingdom', 'F-35B Lightning II', 3500000); INSERT INTO MilitaryTechnology (id, country, program, budget) VALUES (2, 'United Kingdom', 'Type 45 Destroyer', 1200000);", "sql": "SELECT program, budget FROM MilitaryTechnology WHERE country = 'United Kingdom';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the save of the game that 23,394 people attended?", "schema": "CREATE TABLE table_name_49 (save VARCHAR, attendance VARCHAR)", "sql": "SELECT save FROM table_name_49 WHERE attendance = '23,394';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did Goole Town accumulate?", "schema": "CREATE TABLE table_17358515_1 (points_2 VARCHAR, team VARCHAR)", "sql": "SELECT COUNT(points_2) FROM table_17358515_1 WHERE team = 'Goole Town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 108).", "schema": null, "sql": "SELECT interval '1 2' day to minute;", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '1 2' day to minute) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 263).", "schema": null, "sql": "SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average rank Finland, which has 1 bronze, more than 1 silver, and less than 0 gold, has?", "schema": "CREATE TABLE table_name_6 (rank INTEGER, gold VARCHAR, nation VARCHAR, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_6 WHERE bronze = 1 AND silver > 1 AND nation = 'finland' AND gold < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What are the origins of the ethical clothing products?", "schema": "CREATE TABLE EthicalClothing (product_id INT, product_name TEXT, origin TEXT); INSERT INTO EthicalClothing (product_id, product_name, origin) VALUES (1, 'Organic Cotton T-Shirt', 'Nepal'), (2, 'Hemp Pants', 'China'), (3, 'Recycled Polyester Jacket', 'Haiti');", "sql": "SELECT DISTINCT origin FROM EthicalClothing;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "What is the total number of farms that have implemented agroforestry practices in the agroforestry_practices dataset?", "schema": "CREATE TABLE agroforestry_practices (id INT, farm_id INT, agroforestry_practice BOOLEAN);", "sql": "SELECT COUNT(*) FROM agroforestry_practices WHERE agroforestry_practice = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Find the total number of labor rights violations for unions that have a focus on worker safety.", "schema": "CREATE TABLE union_violation_safety (union_id INT, violation_count INT, safety_focus BOOLEAN); INSERT INTO union_violation_safety (union_id, violation_count, safety_focus) VALUES (1, 10, true), (2, 20, false), (3, 30, true), (4, 40, true);", "sql": "SELECT SUM(union_violation_safety.violation_count) FROM union_violation_safety WHERE union_violation_safety.safety_focus = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total of overall values with a safety position in a round greater than 1?", "schema": "CREATE TABLE table_name_26 (overall VARCHAR, position VARCHAR, round VARCHAR)", "sql": "SELECT COUNT(overall) FROM table_name_26 WHERE position = 'safety' AND round > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 6).", "schema": null, "sql": "SELECT count(*) FROM vchartmp WHERE a >= '31b0'::varchar(32);", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Chassis has g Tyres before 1987?", "schema": "CREATE TABLE table_name_26 (chassis VARCHAR, tyres VARCHAR, year VARCHAR)", "sql": "SELECT chassis FROM table_name_26 WHERE tyres = 'g' AND year < 1987;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'bit': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR -2147483646) AS \"error\";", "explanation": "Regression test for Bit in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT SUBSTRING('01010101'::bit(8) FROM -10 FOR -2147483646) AS \"error\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the shooting preference of Matthew Myers, acquired in 2010?", "schema": "CREATE TABLE table_name_99 (shoots VARCHAR, acquired VARCHAR, player VARCHAR)", "sql": "SELECT shoots FROM table_name_99 WHERE acquired = 2010 AND player = 'matthew myers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 11).", "schema": null, "sql": "select 'abc abc abd' ~ '^(.+)( \\1)+$' as f;", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 'abc abc abd' ~ '^(.+)( \\1)+$' as f) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of government does Kyrgyzstan have?", "schema": "CREATE TABLE table_1604579_2 (country VARCHAR)", "sql": "SELECT 2012 AS _democracy_index FROM table_1604579_2 WHERE country = 'Kyrgyzstan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many legal cases were opened and closed in each borough of New York City in the last year?", "schema": "CREATE TABLE legal_cases (case_number INT, borough TEXT, open_date DATE, close_date DATE); INSERT INTO legal_cases (case_number, borough, open_date, close_date) VALUES (1, 'Manhattan', '2021-01-01', '2021-03-01'), (2, 'Brooklyn', '2021-02-01', '2021-04-01'), (3, 'Queens', '2021-03-01', '2021-06-01'), (4, 'Bronx', '2021-04-01', '2021-07-01');", "sql": "SELECT borough, COUNT(*) AS cases_opened FROM legal_cases WHERE open_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY borough; SELECT borough, COUNT(*) AS cases_closed FROM legal_cases WHERE close_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY borough;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 2} {"question": "How many total attendees participated in events organized by the Cultural Center?", "schema": "CREATE TABLE Venues (id INT PRIMARY KEY, name VARCHAR(20)); CREATE TABLE Events (id INT PRIMARY KEY, name VARCHAR(20), venue VARCHAR(20), attendees INT); INSERT INTO Venues (id, name) VALUES (1, 'Cultural Center'); INSERT INTO Events (id, name, venue, attendees) VALUES (1, 'Dance Performance', 'Cultural Center', 100); INSERT INTO Events (id, name, venue, attendees) VALUES (2, 'Music Concert', 'Art Gallery', 50);", "sql": "SELECT SUM(attendees) FROM Events WHERE venue = 'Cultural Center';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum price per gram of flower sold by Grower B in Q1 2022?", "schema": "CREATE TABLE grow (id INT, grower VARCHAR(255), product VARCHAR(255), price FLOAT, gram_weight FLOAT); INSERT INTO grow (id, grower, product, price, gram_weight) VALUES (1, 'Grower B', 'Flower', 15.0, 3.5);", "sql": "SELECT MAX(price / gram_weight) FROM grow WHERE grower = 'Grower B' AND product = 'Flower' AND QUARTER(sale_date) = 1 AND YEAR(sale_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Total, when Season is \"1996-97\", and when Second is less than 33?", "schema": "CREATE TABLE table_name_23 (total INTEGER, season VARCHAR, second VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_23 WHERE season = '1996-97' AND second < 33;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 434).", "schema": null, "sql": "$$ language plpgsql;\n\nselect forc01();\n\n-- try updating the cursor's current row\n\ncreate temp table forc_test as\n select n as i, n as j from generate_series(1,10) n;\n\ncreate or replace function forc01() returns void as $$\ndeclare\n c cursor for select * from forc_test;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 270, "num_statements": 4} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 67).", "schema": null, "sql": "CREATE OPERATOR @ (\n LEFTARG = ltree,\n\tRIGHTARG = ltxtquery,\n\tPROCEDURE = ltxtq_exec,\n\tCOMMUTATOR = '@',\n\tRESTRICT = contsel,\n\tJOIN = contjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score for the Toshiba Senior Classic?", "schema": "CREATE TABLE table_11621799_1 (score VARCHAR, tournament VARCHAR)", "sql": "SELECT score FROM table_11621799_1 WHERE tournament = 'Toshiba Senior Classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What average game has @ florida panthers as the opponent, 0-1-2 as the record, with an october greater than 8?", "schema": "CREATE TABLE table_name_17 (game INTEGER, october VARCHAR, opponent VARCHAR, record VARCHAR)", "sql": "SELECT AVG(game) FROM table_name_17 WHERE opponent = '@ florida panthers' AND record = '0-1-2' AND october > 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the team that is managed by richard barnwell from?", "schema": "CREATE TABLE table_27409644_1 (location VARCHAR, manager VARCHAR)", "sql": "SELECT location FROM table_27409644_1 WHERE manager = 'Richard Barnwell';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Find the total area of soybean fields in the 'field_sizes' table that are larger than 500 acres and are located in Brazil.", "schema": "CREATE TABLE field_sizes (crop_type VARCHAR(50), location VARCHAR(50), area_acres FLOAT); INSERT INTO field_sizes (crop_type, location, area_acres) VALUES ('Soybean', 'Brazil', 650); INSERT INTO field_sizes (crop_type, location, area_acres) VALUES ('Soybean', 'Brazil', 475);", "sql": "SELECT SUM(area_acres) FROM field_sizes WHERE crop_type = 'Soybean' AND location = 'Brazil' AND area_acres > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many dfb-pokal did kevin-prince boateng have?", "schema": "CREATE TABLE table_22167196_1 (dfb_pokal VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(dfb_pokal) FROM table_22167196_1 WHERE player = 'Kevin-Prince Boateng';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of animals in the 'community_education' view, grouped by continent?", "schema": "CREATE TABLE animal_population (animal VARCHAR(50), continent VARCHAR(50), population INT); INSERT INTO animal_population (animal, continent, population) VALUES ('Tiger', 'Asia', 500), ('Elephant', 'Africa', 300), ('Giraffe', 'Africa', 200), ('Penguin', 'Antarctica', 100); CREATE VIEW community_education AS SELECT animal, CONCAT('South ', continent) AS continent FROM animal_population WHERE continent IN ('America', 'Asia');", "sql": "SELECT continent, COUNT(*) FROM community_education GROUP BY continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the minimum diversity score for content creators who identify as LGBTQ+ in Europe?", "schema": "CREATE TABLE content_creators (id INT, name VARCHAR(50), diversity_score INT, country VARCHAR(50), gender VARCHAR(10), sexual_orientation VARCHAR(20)); INSERT INTO content_creators (id, name, diversity_score, country, gender, sexual_orientation) VALUES (1, 'Creator1', 80, 'UK', 'Female', 'LGBTQ+'), (2, 'Creator2', 85, 'France', 'Male', 'Straight'), (3, 'Creator3', 90, 'Germany', 'Non-binary', 'LGBTQ+'), (4, 'Creator4', 75, 'Italy', 'Female', 'Straight');", "sql": "SELECT MIN(diversity_score) FROM content_creators WHERE country IN ('UK', 'France', 'Germany', 'Italy') AND sexual_orientation = 'LGBTQ+';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the total revenue for events in Los Angeles, CA in the last quarter?", "schema": "CREATE TABLE Events (id INT, city VARCHAR(20), country VARCHAR(20), date DATE, price DECIMAL(5,2)); INSERT INTO Events (id, city, country, date, price) VALUES (1, 'Los Angeles', 'USA', '2023-01-01', 20.00), (2, 'Los Angeles', 'USA', '2023-03-15', 30.00);", "sql": "SELECT SUM(price) as total_revenue FROM Events WHERE city = 'Los Angeles' AND country = 'USA' AND date >= DATEADD(quarter, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "How many subscribers are there in each region for broadband services?", "schema": "CREATE TABLE broadband_subscribers (subscriber_id INT, subscriber_name VARCHAR(50), region VARCHAR(50), plan_type VARCHAR(50), monthly_charges DECIMAL(5,2));INSERT INTO broadband_subscribers (subscriber_id, subscriber_name, region, plan_type, monthly_charges) VALUES (1, 'John Doe', 'North', 'Basic', 34.99), (2, 'Jane Smith', 'South', 'Premium', 59.99), (3, 'Bob Johnson', 'East', 'Basic', 34.99), (4, 'Alice Williams', 'West', 'Premium', 59.99);", "sql": "SELECT region, COUNT(*) as subscriber_count FROM broadband_subscribers GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of assistive technology items provided for students with visual or hearing impairments?", "schema": "CREATE TABLE Assistive_Tech (Student_ID INT, Student_Name TEXT, Disability_Type TEXT, Assistive_Tech_Item TEXT); INSERT INTO Assistive_Tech (Student_ID, Student_Name, Disability_Type, Assistive_Tech_Item) VALUES (7, 'Alex Thompson', 'Hearing Impairment', 'Hearing Aid'), (8, 'Jasmine Chen', 'Visual Impairment', 'Screen Reader'), (9, 'Ethan Nguyen', 'None', 'None');", "sql": "SELECT SUM(CASE WHEN Disability_Type IN ('Visual Impairment', 'Hearing Impairment') THEN 1 ELSE 0 END) FROM Assistive_Tech WHERE Assistive_Tech_Item IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Total has a Rank of 5, and a Bronze smaller than 0?", "schema": "CREATE TABLE table_name_67 (total INTEGER, rank VARCHAR, bronze VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_67 WHERE rank = '5' AND bronze < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Runner-up in Tournament of seiersberg", "schema": "CREATE TABLE table_name_95 (runner_up VARCHAR, tournament VARCHAR)", "sql": "SELECT runner_up FROM table_name_95 WHERE tournament = 'seiersberg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were all of the opponents in 1984?", "schema": "CREATE TABLE table_1399994_5 (opponents VARCHAR, year VARCHAR)", "sql": "SELECT opponents FROM table_1399994_5 WHERE year = '1984';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What high points did San Francisco have in a game later than 69?", "schema": "CREATE TABLE table_name_39 (high_points VARCHAR, game VARCHAR, team VARCHAR)", "sql": "SELECT high_points FROM table_name_39 WHERE game > 69 AND team = 'san francisco';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 419).", "schema": null, "sql": "INSERT INTO parent_tbl (a) VALUES(1),(5);", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 354).", "schema": null, "sql": "select * from rngfuncbar();", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from rngfuncbar()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the won promotion for kalmar ff", "schema": "CREATE TABLE table_2119448_3 (won_promotion VARCHAR, lost_promotion_playoffs VARCHAR)", "sql": "SELECT won_promotion FROM table_2119448_3 WHERE lost_promotion_playoffs = 'Kalmar FF';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of buses and trams in the 'south' region?", "schema": "CREATE TABLE vehicle_counts (region VARCHAR(10), vehicle_type VARCHAR(10), count INT); INSERT INTO vehicle_counts (region, vehicle_type, count) VALUES ('east', 'buses', 100), ('west', 'buses', 120), ('north', 'buses', 150), ('south', 'buses', 80), ('east', 'trams', 20), ('west', 'trams', 30), ('north', 'trams', 40), ('south', 'trams', 50);", "sql": "SELECT SUM(count) FROM vehicle_counts WHERE region = 'south' AND (vehicle_type = 'buses' OR vehicle_type = 'trams');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Update the location of the virtual tour with an id of 1", "schema": "CREATE TABLE virtual_tours (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), tour_type VARCHAR(255)); INSERT INTO virtual_tours (id, name, location, tour_type) VALUES (1, 'Paris Virtual Tour', 'Paris', 'museum_tour'); INSERT INTO virtual_tours (id, name, location, tour_type) VALUES (2, 'Rome Virtual Tour', 'Rome', 'city_tour');", "sql": "UPDATE virtual_tours SET location = 'France' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the 2011 US Open when the 2007 was F?", "schema": "CREATE TABLE table_name_59 (tournament VARCHAR)", "sql": "SELECT 2011 FROM table_name_59 WHERE 2007 = 'f' AND tournament = 'us open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What are the cybersecurity incidents reported in the last 6 months?", "schema": "CREATE TABLE cyber_incidents (id INT, incident_date DATE, description VARCHAR(255)); INSERT INTO cyber_incidents (id, incident_date, description) VALUES (1, '2022-01-01', 'Phishing attack'), (2, '2022-03-15', 'Ransomware attack'), (3, '2022-04-20', 'Data breach'), (4, '2022-06-05', 'Malware attack');", "sql": "SELECT description, incident_date FROM cyber_incidents WHERE incident_date >= DATE(NOW()) - INTERVAL 6 MONTH;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Find the average speed of electric vehicles in New York and London?", "schema": "CREATE TABLE vehicle_speed (id INT, type VARCHAR(20), city VARCHAR(20), speed INT); INSERT INTO vehicle_speed (id, type, city, speed) VALUES (1, 'electric', 'New York', 50), (2, 'electric', 'London', 60), (3, 'gasoline', 'New York', 40);", "sql": "SELECT AVG(speed) FROM vehicle_speed WHERE type = 'electric' AND city IN ('New York', 'London');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'jsonb' (example 839).", "schema": null, "sql": "update test_jsonb_subscript set test_json = NULL where id = 3;", "explanation": "DML from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Display the average monthly maintenance cost for each equipment type in the 'EquipmentMaintenance' table", "schema": "CREATE TABLE EquipmentMaintenance (id INT, type VARCHAR(255), cost FLOAT, date DATE);", "sql": "SELECT type, AVG(cost) as avg_monthly_cost FROM EquipmentMaintenance WHERE date >= '2021-01-01' AND date <= '2021-12-31' GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Which medical conditions were diagnosed for astronauts from Egypt?", "schema": "CREATE TABLE Astronaut (id INT PRIMARY KEY, name VARCHAR(50), age INT, gender VARCHAR(10), nationality VARCHAR(50)); CREATE TABLE Medical (id INT PRIMARY KEY, astronaut_id INT, medical_condition VARCHAR(50), examination_date DATE, FOREIGN KEY (astronaut_id) REFERENCES Astronaut(id));", "sql": "SELECT Medical.medical_condition FROM Astronaut INNER JOIN Medical ON Astronaut.id = Medical.astronaut_id WHERE Astronaut.nationality = 'Egypt';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Which organic grocery stores in New York City sell locally sourced fruits?", "schema": "CREATE TABLE Store (id INT, name VARCHAR(50), city VARCHAR(50)); INSERT INTO Store (id, name, city) VALUES (1, 'Green Earth', 'New York'); INSERT INTO Store (id, name, city) VALUES (2, 'Healthy Harvest', 'Los Angeles'); CREATE TABLE Inventory (id INT, store_id INT, item VARCHAR(50), is_local BOOLEAN, is_organic BOOLEAN); INSERT INTO Inventory (id, store_id, item, is_local, is_organic) VALUES (1, 1, 'Apples', TRUE, TRUE); INSERT INTO Inventory (id, store_id, item, is_local, is_organic) VALUES (2, 1, 'Bananas', FALSE, TRUE); INSERT INTO Inventory (id, store_id, item, is_local, is_organic) VALUES (3, 2, 'Oranges', TRUE, TRUE); INSERT INTO Inventory (id, store_id, item, is_local, is_organic) VALUES (4, 2, 'Grapes', FALSE, TRUE);", "sql": "SELECT s.name FROM Store s JOIN Inventory i ON s.id = i.store_id WHERE s.city = 'New York' AND i.is_local = TRUE AND i.is_organic = TRUE AND i.item LIKE 'F%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Calculate the percentage of vegan customers in each city.", "schema": "CREATE TABLE Customers (CustomerID INT, Name TEXT, City TEXT, IsVegan BOOLEAN);", "sql": "SELECT City, IsVegan, COUNT(IsVegan) * 100.0 / (SELECT COUNT(*) FROM Customers) AS Percentage FROM Customers WHERE IsVegan = TRUE GROUP BY City;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player has a no pick position in 1976?", "schema": "CREATE TABLE table_name_33 (player VARCHAR, position VARCHAR, year VARCHAR)", "sql": "SELECT player FROM table_name_33 WHERE position = 'no pick' AND year = 1976;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 18).", "schema": null, "sql": "CREATE FUNCTION _int_same(_int4, _int4)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the maximum horsepower of sports cars released in 2022?", "schema": "CREATE TABLE SportsCars (VIN VARCHAR(20), Model VARCHAR(20), Horsepower INT, ProductionYear INT); INSERT INTO SportsCars (VIN, Model, Horsepower, ProductionYear) VALUES ('AA11BB2233', 'ModelX', 500, 2022), ('CC22DD3344', 'ModelY', 600, 2022);", "sql": "SELECT MAX(Horsepower) FROM SportsCars WHERE ProductionYear = 2022 AND Model LIKE '%Sports Car%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'vacuum' (example 316).", "schema": null, "sql": "INSERT INTO vac_rewrite_toast values (1, repeat('a', 7000));", "explanation": "DML from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many unique customers purchased sustainable clothing in the last 6 months?", "schema": "CREATE TABLE Customers (id INT, name VARCHAR(50), sustainable_purchase_date DATE); INSERT INTO Customers (id, name, sustainable_purchase_date) VALUES (1, 'Alice', '2022-01-01'), (2, 'Bob', '2022-02-15'), (3, 'Charlie', '2022-03-05'), (4, 'David', '2022-04-10'), (5, 'Eve', '2022-05-25'), (6, 'Frank', '2022-06-12');", "sql": "SELECT COUNT(DISTINCT id) FROM Customers WHERE sustainable_purchase_date >= DATEADD(MONTH, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which match was the final score 7–6 (7–0) , 6–7 (5–7) , 4–6, 6–2, 6–7 (5–7)?", "schema": "CREATE TABLE table_26202812_7 (no VARCHAR, score_in_the_final VARCHAR)", "sql": "SELECT no FROM table_26202812_7 WHERE score_in_the_final = '7–6 (7–0) , 6–7 (5–7) , 4–6, 6–2, 6–7 (5–7)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was a competition held at Pietermaritzburg?", "schema": "CREATE TABLE table_name_83 (date VARCHAR, venue VARCHAR)", "sql": "SELECT date FROM table_name_83 WHERE venue = 'pietermaritzburg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the overall pick number that Derrick Harvey was when he was picked in a round after round 1?", "schema": "CREATE TABLE table_name_26 (overall INTEGER, name VARCHAR, round VARCHAR)", "sql": "SELECT MIN(overall) FROM table_name_26 WHERE name = 'derrick harvey' AND round > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of laps during the race that had a time of +9.682?", "schema": "CREATE TABLE table_name_6 (laps VARCHAR, time_retired VARCHAR)", "sql": "SELECT COUNT(laps) FROM table_name_6 WHERE time_retired = '+9.682';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'update' (example 71).", "schema": null, "sql": "CREATE TABLE part_c_1_100 (e varchar, d int, c numeric, b bigint, a text);", "explanation": "DDL from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.91.0--0.92.0 (assertion 109).", "schema": null, "sql": "-- any_column_privs_are ( schema, table, user, privileges[], description )\nCREATE OR REPLACE FUNCTION any_column_privs_are ( NAME, NAME, NAME, NAME[], TEXT )\nRETURNS TEXT AS $$\nDECLARE\n grants TEXT[] := _get_ac_privs( $3, quote_ident($1) || '.' || quote_ident($2) );\nBEGIN\n IF grants[1] = 'undefined_table' THEN\n RETURN ok(FALSE, $5) || E'\\n' || diag(\n ' Table ' || quote_ident($1) || '.' || quote_ident($2) || ' does not exist'\n );\n ELSIF grants[1] = 'undefined_role' THEN\n RETURN ok(FALSE, $5) || E'\\n' || diag(\n ' Role ' || quote_ident($3) || ' does not exist'\n );\n END IF;\n RETURN _assets_are('privileges', grants, $4, $5);\nEND;\n$$ LANGUAGE plpgsql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.91.0--0.92.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 724, "num_statements": 7} {"question": "Generate PostgreSQL SQL for: What percentage of voters voted for a third party in the county that had 802 third party voters?", "schema": "CREATE TABLE table_20278716_2 (others__percentage VARCHAR, others__number VARCHAR)", "sql": "SELECT others__percentage FROM table_20278716_2 WHERE others__number = 802;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many poles had a moto2 class?", "schema": "CREATE TABLE table_name_75 (pole VARCHAR, class VARCHAR)", "sql": "SELECT pole FROM table_name_75 WHERE class = 'moto2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 38).", "schema": null, "sql": "SELECT time without time zone 'T040506.789+08';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT time without time zone 'T040506.789+08') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the match that had a score of 58-6?", "schema": "CREATE TABLE table_name_92 (result VARCHAR, score VARCHAR)", "sql": "SELECT result FROM table_name_92 WHERE score = '58-6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 552).", "schema": null, "sql": "select jsonb_path_query('\"2023-08-15\"', '$.time()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"2023-08-15\"', '$.time()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'union' (example 159).", "schema": null, "sql": "INSERT INTO t2c VALUES ('vw'), ('cd'), ('mn'), ('ef');", "explanation": "DML from PostgreSQL core regression test for Union.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Insert records for properties in New York with a green_certification of 'ENERGY STAR' into the GreenCertified view.", "schema": "CREATE TABLE GreenBuildings (id INT, building_id INT, address VARCHAR(100), green_certification VARCHAR(50)); CREATE VIEW GreenCertified AS SELECT building_id, COUNT(green_certification) as num_certified FROM GreenBuildings WHERE green_certification IS NOT NULL GROUP BY building_id;", "sql": "INSERT INTO GreenCertified (building_id, num_certified) SELECT building_id, 1 FROM GreenBuildings WHERE state = 'NY' AND green_certification = 'ENERGY STAR';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the production code that was written by matt robinson?", "schema": "CREATE TABLE table_2818164_7 (production_code VARCHAR, written_by VARCHAR)", "sql": "SELECT production_code FROM table_2818164_7 WHERE written_by = 'Matt Robinson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What numbered game featured a High rebounds of radoslav nesterović (8), and a High assists of josé calderón (9)?", "schema": "CREATE TABLE table_name_12 (game VARCHAR, high_rebounds VARCHAR, high_assists VARCHAR)", "sql": "SELECT COUNT(game) FROM table_name_12 WHERE high_rebounds = 'radoslav nesterović (8)' AND high_assists = 'josé calderón (9)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "List the names of rural healthcare centers in the US that serve more than 200 patients.", "schema": "CREATE TABLE healthcare_centers_us (name TEXT, location TEXT, patients_served INT); INSERT INTO healthcare_centers_us (name, location, patients_served) VALUES ('HC A', 'Rural Alabama', 250), ('HC B', 'Rural Alaska', 150), ('HC C', 'Rural California', 225);", "sql": "SELECT name FROM healthcare_centers_us WHERE location LIKE 'Rural%' AND patients_served > 200;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total number of publications by assistant professors in the Engineering college?", "schema": "CREATE TABLE faculty(id INT, rank TEXT, college TEXT); CREATE TABLE publications(id INT, faculty_id INT, year INT); INSERT INTO faculty(id, rank, college) VALUES (1, 'assistant professor', 'Engineering'), (2, 'associate professor', 'Liberal Arts'); INSERT INTO publications(id, faculty_id, year) VALUES (1, 1, 2020), (2, 1, 2021), (3, 2, 2019);", "sql": "SELECT COUNT(*) FROM publications JOIN faculty ON publications.id = faculty.id WHERE rank = 'assistant professor' AND college = 'Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "What is the total revenue for organic skincare products in the USA?", "schema": "CREATE TABLE skincare_sales (id INT, product VARCHAR(50), revenue DECIMAL(10,2), country VARCHAR(50)); INSERT INTO skincare_sales (id, product, revenue, country) VALUES (1, 'Organic Facial Cleanser', 500.00, 'USA');", "sql": "SELECT SUM(revenue) FROM skincare_sales WHERE product LIKE '%Organic%' AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average age of athletes in the 'Soccer' team in the 'athletes' table?", "schema": "CREATE TABLE athletes (athlete_id INT, name VARCHAR(50), age INT, team VARCHAR(20)); INSERT INTO athletes (athlete_id, name, age, team) VALUES (1, 'Jane Doe', 30, 'Soccer'); INSERT INTO athletes (athlete_id, name, age, team) VALUES (2, 'Jim Brown', 28, 'Basketball'); INSERT INTO athletes (athlete_id, name, age, team) VALUES (3, 'Marie Jones', 26, 'Soccer');", "sql": "SELECT AVG(age) FROM athletes WHERE team = 'Soccer';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Create a table named 'events' with columns 'name', 'date', and 'attendance'", "schema": "CREATE TABLE events (name VARCHAR(255), date DATE, attendance INT);", "sql": "CREATE TABLE events (name VARCHAR(255), date DATE, attendance INT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of marine life research sites in the Pacific and Atlantic oceans?", "schema": "CREATE TABLE marine_sites (site_id INT, site_name TEXT, ocean TEXT); INSERT INTO marine_sites (site_id, site_name, ocean) VALUES (1, 'Research Site A', 'Pacific'), (2, 'Research Site B', 'Atlantic'), (3, 'Research Site C', 'Pacific');", "sql": "SELECT COUNT(*) FROM marine_sites WHERE ocean IN ('Pacific', 'Atlantic');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 739).", "schema": null, "sql": "insert into itrtest values (1, 'test1'), (2, 'test2') returning *;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the South African Grand Prix?", "schema": "CREATE TABLE table_name_63 (race VARCHAR)", "sql": "SELECT race AS Winner FROM table_name_63 WHERE race = 'south african grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total transaction volume for all clients in the Asia-Pacific region, excluding transactions from the banking sector?", "schema": "CREATE TABLE client (client_id INT, client_name VARCHAR(50), region VARCHAR(50)); INSERT INTO client (client_id, client_name, region) VALUES (1, 'ABC Corp', 'Asia-Pacific'), (2, 'XYZ Bank', 'Americas'); CREATE TABLE transaction (transaction_id INT, client_id INT, sector VARCHAR(50), amount DECIMAL(10,2)); INSERT INTO transaction (transaction_id, client_id, sector, amount) VALUES (1, 1, 'Retail', 5000), (2, 1, 'Real Estate', 7000), (3, 2, 'Banking', 10000), (4, 1, 'Technology', 8000);", "sql": "SELECT SUM(amount) FROM transaction WHERE client_id IN (SELECT client_id FROM client WHERE region = 'Asia-Pacific') AND sector != 'Banking';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the FIS Nordic World Ski Championship years when the winter Olympics took place in 1968?", "schema": "CREATE TABLE table_name_93 (fis_nordic_world_ski_championships VARCHAR, winter_olympics VARCHAR)", "sql": "SELECT fis_nordic_world_ski_championships FROM table_name_93 WHERE winter_olympics = 1968;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What engine does driver james hunt have?", "schema": "CREATE TABLE table_name_94 (engine VARCHAR, driver VARCHAR)", "sql": "SELECT engine FROM table_name_94 WHERE driver = 'james hunt';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Select all tanks from the military_equipment table", "schema": "CREATE TABLE military_equipment (equipment_id INT, name VARCHAR(255), type VARCHAR(255), country_of_origin VARCHAR(255), year INT); INSERT INTO military_equipment (equipment_id, name, type, country_of_origin, year) VALUES (1, 'M1 Abrams', 'Tank', 'USA', 1980), (2, 'Leopard 2', 'Tank', 'Germany', 1979), (3, 'F-16', 'Fighter Jet', 'USA', 1976);", "sql": "SELECT * FROM military_equipment WHERE type = 'Tank';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the name of the title for the Maserati A6GCS?", "schema": "CREATE TABLE table_name_81 (race_title VARCHAR, vehicle VARCHAR)", "sql": "SELECT race_title FROM table_name_81 WHERE vehicle = 'maserati a6gcs';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which status accompanies the score 21.5?", "schema": "CREATE TABLE table_name_94 (status VARCHAR, score VARCHAR)", "sql": "SELECT status FROM table_name_94 WHERE score = 21.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Calculate the total volume of wastewater treated in 'Lima' for each month of the year 2020", "schema": "CREATE TABLE wastewater_treatment (region VARCHAR(50), date DATE, volume FLOAT); INSERT INTO wastewater_treatment (region, date, volume) VALUES ('Lima', '2020-01-01', 500), ('Lima', '2020-02-01', 550), ('Lima', '2020-03-01', 600);", "sql": "SELECT date, SUM(volume) FROM wastewater_treatment WHERE region = 'Lima' AND date BETWEEN '2020-01-01' AND '2020-12-31' GROUP BY date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "What is the minimum ocean acidity level ever recorded in the Caribbean Sea, grouped by measurement month?", "schema": "CREATE TABLE ocean_acidity_records (record_id INTEGER, month INTEGER, acidity_level FLOAT, ocean TEXT);", "sql": "SELECT month, MIN(acidity_level) FROM ocean_acidity_records WHERE ocean = 'Caribbean Sea' GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is John Edwards, when John Kerry is \"70%\"?", "schema": "CREATE TABLE table_name_41 (john_edwards VARCHAR, john_kerry VARCHAR)", "sql": "SELECT john_edwards FROM table_name_41 WHERE john_kerry = '70%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_split': Write the SELECT query (example 114).", "schema": null, "sql": "SELECT tableoid::regclass, * FROM salespeople ORDER BY tableoid::regclass::text COLLATE \"C\", salesperson_id;", "explanation": "Regression test for Partition Split in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tableoid::regclass, * FROM salespeople ORDER BY tableoid::regclass::text COLLATE \"C\", salesperson_id) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'alter_generic' (example 86).", "schema": null, "sql": "ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2);", "explanation": "PL/pgSQL object from PostgreSQL core test for Alter Generic.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 572).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION check_test( TEXT, BOOLEAN );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Determine virtual reality games that have more than 800 players and released in 2019", "schema": "CREATE TABLE vr_games (game VARCHAR(20), players INT, release_year INT); INSERT INTO vr_games (game, players, release_year) VALUES ('Game1', 1000, 2019); INSERT INTO vr_games (game, players, release_year) VALUES ('Game2', 500, 2018);", "sql": "SELECT game FROM vr_games WHERE players > 800 AND release_year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the minimum stations owned since kero-tv?", "schema": "CREATE TABLE table_1847523_2 (owned_since INTEGER, station VARCHAR)", "sql": "SELECT MIN(owned_since) FROM table_1847523_2 WHERE station = 'KERO-TV';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Bronze, when Rank is greater than 6, when Nation is Italy (ITA), and when Total is less than 1?", "schema": "CREATE TABLE table_name_78 (bronze INTEGER, total VARCHAR, rank VARCHAR, nation VARCHAR)", "sql": "SELECT AVG(bronze) FROM table_name_78 WHERE rank > 6 AND nation = 'italy (ita)' AND total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 14).", "schema": null, "sql": "SELECT pg_sleep(0.1);", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_sleep(0.1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "What is the total annual production of Terbium from all mines in 2020?", "schema": "CREATE TABLE mine (id INT, name TEXT, location TEXT, Terbium_annual_production FLOAT, timestamp TIMESTAMP); INSERT INTO mine (id, name, location, Terbium_annual_production, timestamp) VALUES (1, 'Australian Mine', 'Australia', 1500.5, '2020-01-01'), (2, 'Californian Mine', 'USA', 1700.3, '2020-01-01'), (3, 'Brazilian Mine', 'Brazil', 1000.0, '2020-01-01');", "sql": "SELECT SUM(Terbium_annual_production) FROM mine WHERE EXTRACT(YEAR FROM timestamp) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of the episode that aired on december 12, 1953?", "schema": "CREATE TABLE table_15824796_3 (title VARCHAR, original_air_date VARCHAR)", "sql": "SELECT title FROM table_15824796_3 WHERE original_air_date = 'December 12, 1953';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum folded value of the team whose stadium is Fraser Field?", "schema": "CREATE TABLE table_24334261_1 (folded INTEGER, stadium VARCHAR)", "sql": "SELECT MAX(folded) FROM table_24334261_1 WHERE stadium = 'Fraser Field';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many numbers were recorded under max speed for 1 USAF space flight and total flights 34?", "schema": "CREATE TABLE table_221315_3 (max_speed__mph_ VARCHAR, usaf_space_flights VARCHAR, total_flights VARCHAR)", "sql": "SELECT COUNT(max_speed__mph_) FROM table_221315_3 WHERE usaf_space_flights = 1 AND total_flights = 34;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'create_table_like': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT * FROM test_like_id_2; -- identity was not copied\nCREATE TABLE test_like_id_3 (LIKE test_like_id_1 INCLUDING IDENTITY);", "explanation": "Regression test for Create Table Like in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM test_like_id_2; -- identity was not copied\nCREATE TABLE test_like_id_3 (LIKE test_like_id_1 INCLUDING IDENTITY)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 2} {"question": "Identify the total workout time (in minutes) for users in the \"East\" region who have exercised for over 120 minutes in a session, for each session type, in the last week.", "schema": "CREATE TABLE user_profile (user_id INT, region VARCHAR(20), PRIMARY KEY (user_id)); CREATE TABLE workout_sessions (session_date DATE, user_id INT, session_type VARCHAR(30), workout_mins INT, PRIMARY KEY (session_date, user_id)); INSERT INTO user_profile (user_id, region) VALUES (1, 'East'), (2, 'North'), (3, 'East'); INSERT INTO workout_sessions (session_date, user_id, session_type, workout_mins) VALUES ('2022-04-01', 1, 'Cardio', 150), ('2022-04-02', 2, 'Strength', 100), ('2022-04-03', 3, 'Cardio', 180), ('2022-04-04', 1, 'Yoga', 90), ('2022-04-04', 3, 'Strength', 130);", "sql": "SELECT session_type, SUM(workout_mins) as total_workout_mins FROM workout_sessions JOIN user_profile ON workout_sessions.user_id = user_profile.user_id WHERE user_profile.region = 'East' AND workout_mins > 120 AND session_date >= DATE(NOW()) - INTERVAL 7 DAY GROUP BY session_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 281, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest Overall, when Player is \"Dan Jennings\", and when Round is greater than 9?", "schema": "CREATE TABLE table_name_23 (overall INTEGER, player VARCHAR, round VARCHAR)", "sql": "SELECT MIN(overall) FROM table_name_23 WHERE player = 'dan jennings' AND round > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average age of inmates in Illinois prisons who have not participated in restorative justice programs?", "schema": "CREATE TABLE prisons (id INT, state VARCHAR(2)); INSERT INTO prisons (id, state) VALUES (1, 'Illinois'); CREATE TABLE inmates (id INT, age INT, prison_id INT, restorative_justice BOOLEAN);", "sql": "SELECT AVG(inmates.age) FROM inmates INNER JOIN prisons ON inmates.prison_id = prisons.id WHERE prisons.state = 'Illinois' AND inmates.restorative_justice = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the average word count of articles in 'reputable_news' table for each author who is over 40 years old?", "schema": "CREATE TABLE reputable_news (article_id INT, author_name VARCHAR(50), author_age INT, word_count INT, publication_date DATE);", "sql": "SELECT author_name, AVG(word_count) FROM reputable_news WHERE author_age > 40 GROUP BY author_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the difference between the maximum and minimum expedition depths for the 'Deep Sea Divers' organization?", "schema": "CREATE TABLE expedition (org VARCHAR(20), depth INT); INSERT INTO expedition VALUES ('Ocean Explorer', 2500), ('Ocean Explorer', 3000), ('Sea Discoverers', 2000), ('Marine Investigators', 4000), ('Marine Investigators', 4500), ('Deep Sea Divers', 7000), ('Deep Sea Divers', 6500);", "sql": "SELECT MAX(depth) - MIN(depth) FROM expedition WHERE org = 'Deep Sea Divers';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Which excavation site has the most total artifacts?", "schema": "CREATE TABLE excavation_sites (id INT, site_name VARCHAR(255)); CREATE TABLE artifacts (id INT, excavation_site_id INT, artifact_type VARCHAR(255));", "sql": "SELECT e.site_name, COUNT(a.id) AS total_artifacts FROM excavation_sites e JOIN artifacts a ON e.id = a.excavation_site_id GROUP BY e.site_name ORDER BY total_artifacts DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 250).", "schema": null, "sql": "INSERT INTO caster (text) VALUES ('04:05:06'::time);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Create a table to store product safety records", "schema": "CREATE TABLE product_safety (safety_record_id INT PRIMARY KEY, product_id INT, safety_test_date DATE, safety_result TEXT);", "sql": "CREATE TABLE product_safety (safety_record_id INT PRIMARY KEY, product_id INT, safety_test_date DATE, safety_result TEXT);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has 79-48 as the record?", "schema": "CREATE TABLE table_name_47 (date VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_47 WHERE record = '79-48';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total for 3rd place on season 9?", "schema": "CREATE TABLE table_name_41 (total VARCHAR, place VARCHAR, season VARCHAR)", "sql": "SELECT total FROM table_name_41 WHERE place = '3rd' AND season = '9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which GDP per capita (US$) (2004) has a Literacy (2003) of 90%, and an Area (km²) of 1247689.5?", "schema": "CREATE TABLE table_name_53 (gdp_per_capita__us INTEGER, literacy__2003_ VARCHAR, area__km²_ VARCHAR)", "sql": "SELECT AVG(gdp_per_capita__us) AS $___2004_ FROM table_name_53 WHERE literacy__2003_ = '90%' AND area__km²_ = 1247689.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What pick was the player from Apopka, FL in the 2002 MLB draft", "schema": "CREATE TABLE table_11677100_8 (mlb_draft VARCHAR, hometown VARCHAR)", "sql": "SELECT mlb_draft FROM table_11677100_8 WHERE hometown = 'Apopka, FL';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 92).", "schema": null, "sql": "SELECT xmlserialize(DOCUMENT '42text node73' AS text INDENT);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlserialize(DOCUMENT '42text node73' AS text INDENT)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the maximum number of workers on a single project in the state of New York?", "schema": "CREATE TABLE Projects (project_id INT, state VARCHAR(255), num_workers INT); INSERT INTO Projects (project_id, state, num_workers) VALUES (1, 'New York', 10), (2, 'New York', 15);", "sql": "SELECT MAX(num_workers) FROM Projects WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score for United States when the player was Mike Reid and the To par was e?", "schema": "CREATE TABLE table_name_30 (score VARCHAR, player VARCHAR, country VARCHAR, to_par VARCHAR)", "sql": "SELECT score FROM table_name_30 WHERE country = 'united states' AND to_par = 'e' AND player = 'mike reid';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find all the male members of club \"Hopkins Student Enterprises\". Show the first name and last name.", "schema": "CREATE TABLE club (clubid VARCHAR, clubname VARCHAR); CREATE TABLE student (fname VARCHAR, lname VARCHAR, stuid VARCHAR, sex VARCHAR); CREATE TABLE member_of_club (clubid VARCHAR, stuid VARCHAR)", "sql": "SELECT t3.fname, t3.lname FROM club AS t1 JOIN member_of_club AS t2 ON t1.clubid = t2.clubid JOIN student AS t3 ON t2.stuid = t3.stuid WHERE t1.clubname = 'Hopkins Student Enterprises' AND t3.sex = 'M';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "What is the total number of volunteers and total number of hours contributed for the entire organization?", "schema": "CREATE TABLE organizations (org_id INT, org_name TEXT);CREATE TABLE volunteers (vol_id INT, org_id INT, hours_contributed INT, volunteer_name TEXT); INSERT INTO organizations VALUES (1, 'Habitat for Humanity'); INSERT INTO organizations VALUES (2, 'Red Cross'); INSERT INTO volunteers VALUES (1, 1, 10, 'John Doe'); INSERT INTO volunteers VALUES (2, 1, 15, 'Jane Smith'); INSERT INTO volunteers VALUES (3, 2, 20, 'Mary Johnson');", "sql": "SELECT organizations.org_name, COUNT(DISTINCT volunteers.vol_id) AS total_volunteers, SUM(volunteers.hours_contributed) AS total_hours FROM organizations INNER JOIN volunteers ON organizations.org_id = volunteers.org_id GROUP BY organizations.org_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'unaccent' (example 16).", "schema": null, "sql": "SELECT ts_lexize('unaccent', 'foobar');", "explanation": "Example query from the 'unaccent' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "What is the distribution of AI safety principles across different regions?", "schema": "CREATE TABLE ai_safety_principles (principle_name TEXT, region TEXT); INSERT INTO ai_safety_principles (principle_name, region) VALUES ('Beneficence', 'Asia'), ('Nonmaleficence', 'Europe'), ('Autonomy', 'Americas'), ('Justice', 'Africa');", "sql": "SELECT principle_name, region, COUNT(*) FROM ai_safety_principles GROUP BY principle_name, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country has is Len Mattiace in T10 place?", "schema": "CREATE TABLE table_name_54 (country VARCHAR, place VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_54 WHERE place = 't10' AND player = 'len mattiace';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the average cost of renewable energy infrastructure for projects in the 'renewable_energy' schema, grouped by infrastructure type?", "schema": "CREATE TABLE renewable_energy (project_id INT, infrastructure_type TEXT, cost FLOAT); INSERT INTO renewable_energy (project_id, infrastructure_type, cost) VALUES (1, 'solar farm', 500000.0), (1, 'wind farm', 700000.0), (2, 'geothermal plant', 900000.0), (2, 'hydroelectric dam', 1000000.0);", "sql": "SELECT infrastructure_type, AVG(cost) FROM renewable_energy WHERE project_id IN (SELECT project_id FROM projects WHERE schema_name = 'renewable_energy') GROUP BY infrastructure_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Centerfold model in the issue in which the Interview subject was José Napoleón Duarte?", "schema": "CREATE TABLE table_name_96 (centerfold_model VARCHAR, interview_subject VARCHAR)", "sql": "SELECT centerfold_model FROM table_name_96 WHERE interview_subject = 'josé napoleón duarte';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the total number of labor rights violations for unions in the manufacturing sector, grouped by union name?", "schema": "CREATE TABLE union_manufacturing (union_id INT, union_name TEXT, sector TEXT, violations INT); INSERT INTO union_manufacturing (union_id, union_name, sector, violations) VALUES (1, 'Union P', 'Manufacturing', 30), (2, 'Union Q', 'Manufacturing', 40), (3, 'Union R', 'Retail', 20);", "sql": "SELECT union_name, SUM(violations) FROM union_manufacturing WHERE sector = 'Manufacturing' GROUP BY union_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What 2009 has statistics by surface in 2012?", "schema": "CREATE TABLE table_name_66 (Id VARCHAR)", "sql": "SELECT 2009 FROM table_name_66 WHERE 2012 = 'statistics by surface';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 266).", "schema": null, "sql": "select jsonb_path_query('[]', 'strict $.double()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[]', 'strict $.double()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Which causes received the highest donation amounts in a specific region?", "schema": "CREATE TABLE Donations (DonationID INT, Cause VARCHAR(50), Amount DECIMAL(10,2), Region VARCHAR(50)); INSERT INTO Donations (DonationID, Cause, Amount, Region) VALUES (1, 'Education', 2000, 'Africa'), (2, 'Health', 3000, 'Asia'), (3, 'Education', 1000, 'Africa'), (4, 'Environment', 4000, 'Europe');", "sql": "SELECT Cause, SUM(Amount) as TotalDonation FROM Donations WHERE Region = 'Asia' GROUP BY Cause ORDER BY TotalDonation DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Find the total number of smart contracts associated with digital assets having a value greater than 80?", "schema": "CREATE TABLE digital_assets (asset_id INT, asset_name VARCHAR(50), value DECIMAL(10,2)); INSERT INTO digital_assets (asset_id, asset_name, value) VALUES (1, 'Asset1', 50.5), (2, 'Asset2', 100.2), (3, 'Asset3', 75.0); CREATE TABLE smart_contracts (contract_id INT, asset_id INT, contract_name VARCHAR(50)); INSERT INTO smart_contracts (contract_id, asset_id, contract_name) VALUES (101, 1, 'Contract1'), (102, 2, 'Contract2'), (103, 3, 'Contract3');", "sql": "SELECT COUNT(*) FROM smart_contracts INNER JOIN digital_assets ON smart_contracts.asset_id = digital_assets.asset_id WHERE digital_assets.value > 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Country has a Score of 69-66=135?", "schema": "CREATE TABLE table_name_76 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_76 WHERE score = 69 - 66 = 135;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the maximum salary of unionized workers in the 'Retail' industry?", "schema": "CREATE TABLE Workers (EmployeeID INT, Industry VARCHAR(20), UnionMember BOOLEAN, Salary FLOAT); INSERT INTO Workers (EmployeeID, Industry, UnionMember, Salary) VALUES (1, 'Retail', true, 35000.0), (2, 'Retail', true, 36000.0), (3, 'Retail', false, 33000.0);", "sql": "SELECT MAX(Salary) FROM Workers WHERE Industry = 'Retail' AND UnionMember = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum depth of any marine protected area in the Indian region?", "schema": "CREATE TABLE marine_protected_areas (name VARCHAR(255), location VARCHAR(255), depth FLOAT); INSERT INTO marine_protected_areas (name, location, depth) VALUES ('MPA 1', 'Indian', 150.7); INSERT INTO marine_protected_areas (name, location, depth) VALUES ('MPA 2', 'Atlantic', 200.3);", "sql": "SELECT MAX(depth) FROM marine_protected_areas WHERE location = 'Indian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_table' (example 348).", "schema": null, "sql": "CREATE TABLE tt8(a int);", "explanation": "DDL from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "List all arctic research stations in Russia and their respective altitudes.", "schema": "CREATE TABLE ResearchStations (name TEXT, country TEXT, altitude INTEGER);", "sql": "SELECT name, altitude FROM ResearchStations WHERE country = 'Russia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Opponent before Week 11 with an Attendance of 63,571?", "schema": "CREATE TABLE table_name_94 (opponent VARCHAR, week VARCHAR, attendance VARCHAR)", "sql": "SELECT opponent FROM table_name_94 WHERE week < 11 AND attendance = '63,571';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "How many properties are there in the 'affordable_housing' table for each city?", "schema": "CREATE TABLE affordable_housing (id INT, address VARCHAR(255), city VARCHAR(255), state VARCHAR(255)); INSERT INTO affordable_housing (id, address, city, state) VALUES (1, '789 Pine St', 'Seattle', 'WA'), (2, '321 Cedar Rd', 'Seattle', 'WA'), (3, '543 Elm Ave', 'Portland', 'OR');", "sql": "SELECT city, COUNT(*) FROM affordable_housing GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2012 club for the Billy Miller Category:Articles with hcards (Name)?", "schema": "CREATE TABLE table_name_55 (name VARCHAR)", "sql": "SELECT 2012 AS _club FROM table_name_55 WHERE name = 'billy miller category:articles with hcards';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "How many alternative dispute resolution methods were used for civil cases in each county?", "schema": "CREATE TABLE civil_cases (case_id INT, case_county VARCHAR(20)); CREATE TABLE dispute_resolution (case_id INT, resolution_type VARCHAR(20));", "sql": "SELECT cc.case_county, COUNT(dr.resolution_type) FROM civil_cases cc INNER JOIN dispute_resolution dr ON cc.case_id = dr.case_id GROUP BY cc.case_county;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the total mass of space debris grouped by the debris type in the space_debris table?", "schema": "CREATE TABLE space_debris (debris_type VARCHAR(30), mass FLOAT, debris_id INT); INSERT INTO space_debris VALUES ('Fuel Tank', 1500.20, 1), ('Upper Stage', 3000.50, 2), ('Payload Adapter', 700.30, 3), ('Instrument', 100.10, 4);", "sql": "SELECT debris_type, SUM(mass) OVER (PARTITION BY debris_type) FROM space_debris;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Opponent that has the Date of october 11, 1953?", "schema": "CREATE TABLE table_name_62 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_62 WHERE date = 'october 11, 1953';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the game played where St Kilda was the home team?", "schema": "CREATE TABLE table_name_4 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_4 WHERE home_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What final score was there on October 8, 2006?", "schema": "CREATE TABLE table_name_96 (score_in_the_final VARCHAR, date VARCHAR)", "sql": "SELECT score_in_the_final FROM table_name_96 WHERE date = 'october 8, 2006';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average transaction amount for customers from the United States and Canada?", "schema": "CREATE TABLE transactions (id INT, customer_id INT, amount DECIMAL(10,2), country VARCHAR(50)); INSERT INTO transactions (id, customer_id, amount, country) VALUES (1, 101, 500.00, 'USA'), (2, 102, 350.00, 'Canada'), (3, 103, 700.00, 'USA'), (4, 104, 600.00, 'Canada');", "sql": "SELECT AVG(amount) FROM transactions WHERE country IN ('USA', 'Canada');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Delete the record for the sale of 'Artillery Systems' to 'Asia' by 'Red Shield LLC' if the sale quantity is less than 50.", "schema": "CREATE TABLE RedShieldSales(id INT, contractor VARCHAR(255), region VARCHAR(255), equipment VARCHAR(255), quantity INT);INSERT INTO RedShieldSales(id, contractor, region, equipment, quantity) VALUES (1, 'Red Shield LLC', 'Asia', 'Artillery Systems', 45);", "sql": "DELETE FROM RedShieldSales WHERE contractor = 'Red Shield LLC' AND region = 'Asia' AND equipment = 'Artillery Systems' AND quantity < 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the Pittsburgh Home game on March 3 with 61 Points?", "schema": "CREATE TABLE table_name_18 (score VARCHAR, date VARCHAR, home VARCHAR, points VARCHAR)", "sql": "SELECT score FROM table_name_18 WHERE home = 'pittsburgh' AND points = 61 AND date = 'march 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Leading Scorer has an Opponent of @ indiana?", "schema": "CREATE TABLE table_name_10 (Leading VARCHAR, opponent VARCHAR)", "sql": "SELECT Leading AS scorer FROM table_name_10 WHERE opponent = '@ indiana';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of medical supply distributions in Haiti?", "schema": "CREATE TABLE medical_supplies (id INT, location VARCHAR(255), distribution_date DATE); INSERT INTO medical_supplies (id, location, distribution_date) VALUES (1, 'Haiti', '2022-01-01'), (2, 'Syria', '2022-01-02'), (3, 'Haiti', '2022-01-03');", "sql": "SELECT COUNT(*) FROM medical_supplies WHERE location = 'Haiti';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the total labor cost for all building projects in Florida?", "schema": "CREATE TABLE labor_costs (id INT, project_state TEXT, project_type TEXT, labor_cost DECIMAL(10,2)); INSERT INTO labor_costs (id, project_state, project_type, labor_cost) VALUES (1, 'Florida', 'Residential', 15000.00), (2, 'Florida', 'Commercial', 25000.00), (3, 'California', 'Sustainable', 12000.00);", "sql": "SELECT SUM(labor_cost) FROM labor_costs WHERE project_state = 'Florida';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum number of therapy sessions attended by a patient in Oregon?", "schema": "CREATE TABLE therapy_attendance (patient_id INT, sessions_attended INT, location VARCHAR(50)); INSERT INTO therapy_attendance (patient_id, sessions_attended, location) VALUES (1, 12, 'Oregon'), (2, 10, 'Washington'), (3, 15, 'Oregon'), (4, 8, 'California'), (5, 20, 'Oregon');", "sql": "SELECT location, MAX(sessions_attended) FROM therapy_attendance GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total revenue generated from concert ticket sales for artists from Australia?", "schema": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(255), country VARCHAR(255)); CREATE TABLE concerts (concert_id INT, artist_id INT, concert_name VARCHAR(255), country VARCHAR(255), revenue INT); INSERT INTO artists (artist_id, artist_name, country) VALUES (101, 'Taylor Swift', 'USA'), (102, 'BTS', 'South Korea'), (103, 'Olivia Newton-John', 'Australia'); INSERT INTO concerts (concert_id, artist_id, concert_name, country, revenue) VALUES (1, 101, 'Pop Music Festival', 'UK', 500000), (2, 102, 'K-Pop Music Festival', 'Japan', 700000), (3, 103, 'Country Music Festival', 'Australia', 600000), (4, 101, 'Rock Music Festival', 'USA', 800000);", "sql": "SELECT SUM(revenue) FROM concerts JOIN artists ON concerts.artist_id = artists.artist_id WHERE artists.country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "How many new male members joined in Q1 2022 from the US?", "schema": "CREATE TABLE members (member_id INT, gender VARCHAR(10), join_date DATE, country VARCHAR(50)); INSERT INTO members (member_id, gender, join_date, country) VALUES (1, 'Female', '2021-01-15', 'Canada'), (2, 'Male', '2022-03-28', 'USA');", "sql": "SELECT COUNT(*) FROM members WHERE gender = 'Male' AND join_date >= '2022-01-01' AND join_date < '2022-04-01' AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value in 2009 corresponding to Career Statistics in 2003?", "schema": "CREATE TABLE table_name_96 (Id VARCHAR)", "sql": "SELECT 2009 FROM table_name_96 WHERE 2003 = 'career statistics';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Home, when Score is 103-93?", "schema": "CREATE TABLE table_name_95 (home VARCHAR, score VARCHAR)", "sql": "SELECT home FROM table_name_95 WHERE score = '103-93';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Who are the female actors with more than 10 movies acted?", "schema": "CREATE TABLE actors (name VARCHAR(255), gender VARCHAR(10), movies INTEGER); INSERT INTO actors (name, gender, movies) VALUES ('ActorA', 'Female', 12), ('ActorB', 'Male', 15), ('ActorC', 'Female', 8), ('ActorD', 'Female', 20), ('ActorE', 'Male', 18), ('ActorF', 'Male', 10), ('ActorG', 'Female', 11);", "sql": "SELECT name FROM actors WHERE gender = 'Female' AND movies > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total quantity of 'Dress' type garments produced by each manufacturer in the 'Spring 2022' season?", "schema": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(50)); INSERT INTO Manufacturers (ManufacturerID, ManufacturerName) VALUES (1, 'Manufacturer A'), (2, 'Manufacturer B'); CREATE TABLE Garments (GarmentID INT, GarmentType VARCHAR(20), ManufacturerID INT, ProductionDate DATE); INSERT INTO Garments (GarmentID, GarmentType, ManufacturerID, ProductionDate) VALUES (1, 'Dress', 1, '2022-03-15'), (2, 'Top', 1, '2022-03-16'), (3, 'Dress', 2, '2022-04-01');", "sql": "SELECT M.ManufacturerName, SUM(CASE WHEN G.GarmentType = 'Dress' THEN 1 ELSE 0 END) as TotalDresses FROM Manufacturers M JOIN Garments G ON M.ManufacturerID = G.ManufacturerID WHERE G.ProductionDate BETWEEN '2022-03-01' AND '2022-06-30' GROUP BY M.ManufacturerName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 265, "num_statements": 1} {"question": "Count the number of users who prefer each brand, for products that do not include water as an ingredient, partitioned by preferred brand.", "schema": "CREATE TABLE ConsumerPreferences (UserID INT, PreferredBrand VARCHAR(50), PreferredProduct VARCHAR(50), PreferredIngredients VARCHAR(255)); INSERT INTO ConsumerPreferences (UserID, PreferredBrand, PreferredProduct, PreferredIngredients) VALUES (1, 'Origins', 'Ginzing Energy-Boosting Gel Moisturizer', 'Water, Glycerin'); INSERT INTO ConsumerPreferences (UserID, PreferredBrand, PreferredProduct, PreferredIngredients) VALUES (2, 'Lush', 'Tea Totaler Facial Oil', 'Water, Glycerin, Rosehip Oil');", "sql": "SELECT PreferredBrand, PreferredProduct, PreferredIngredients, COUNT(*) OVER (PARTITION BY PreferredBrand) as 'BrandPopularity' FROM ConsumerPreferences WHERE PreferredIngredients NOT LIKE '%Water%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 199, "num_statements": 1} {"question": "What is the military spending by country and year?", "schema": "CREATE TABLE military_spending (id INT, country VARCHAR(255), amount FLOAT, year INT); INSERT INTO military_spending (id, country, amount, year) VALUES (1, 'Brazil', 25.6, 2018); INSERT INTO military_spending (id, country, amount, year) VALUES (2, 'South Africa', 30.8, 2019);", "sql": "SELECT country, year, SUM(amount) as total_spending FROM military_spending GROUP BY country, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Identify the names and locations of all marine protected areas (MPAs) in the Arctic region.", "schema": "CREATE TABLE MARINE_PROTECTED_AREAS (NAME TEXT, LOCATION TEXT, REGION TEXT); INSERT INTO MARINE_PROTECTED_AREAS (NAME, LOCATION, REGION) VALUES ('Arctic National Wildlife Refuge', 'Alaska, USA', 'Arctic'), ('Norwegian Arctic Archipelago', 'Svalbard, Norway', 'Arctic'), ('Gulf of Bothnia National Park', 'Sweden', 'Arctic'), ('Franz Josef Land Nature Reserve', 'Russia', 'Arctic'), ('Arctic Bay Wildlife Sanctuary', 'Canada', 'Arctic');", "sql": "SELECT NAME, LOCATION FROM MARINE_PROTECTED_AREAS WHERE REGION = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the total number of marine species in the Atlantic Ocean that are affected by maritime safety issues?", "schema": "CREATE TABLE marine_species (id INT, name TEXT, ocean TEXT, affected_by_safety_issues BOOLEAN); INSERT INTO marine_species (id, name, ocean, affected_by_safety_issues) VALUES (1, 'Krill', 'Southern', TRUE), (2, 'Blue Whale', 'Atlantic', FALSE), (3, 'Penguin', 'Southern', TRUE), (4, 'Squid', 'Atlantic', TRUE);", "sql": "SELECT COUNT(*) FROM marine_species WHERE ocean = 'Atlantic' AND affected_by_safety_issues = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Fred Stolle's final year of competing in a championship?", "schema": "CREATE TABLE table_2201724_1 (year INTEGER)", "sql": "SELECT MAX(year) FROM table_2201724_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the dates of birth of entrepreneurs with investor \"Simon Woodroffe\" or \"Peter Jones\"?", "schema": "CREATE TABLE entrepreneur (People_ID VARCHAR, Investor VARCHAR); CREATE TABLE people (Date_of_Birth VARCHAR, People_ID VARCHAR)", "sql": "SELECT T2.Date_of_Birth FROM entrepreneur AS T1 JOIN people AS T2 ON T1.People_ID = T2.People_ID WHERE T1.Investor = 'Simon Woodroffe' OR T1.Investor = 'Peter Jones';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the visitor with a 14-10 record?", "schema": "CREATE TABLE table_name_71 (visitor VARCHAR, record VARCHAR)", "sql": "SELECT visitor FROM table_name_71 WHERE record = '14-10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Insert new records into the fashion_trends table with the latest trending garment types in the African market.", "schema": "CREATE TABLE fashion_trends (id INT, garment_type VARCHAR(255), region VARCHAR(255), popularity INT); INSERT INTO fashion_trends (id, garment_type, region, popularity) VALUES (1, 'Ankara Dress', 'Africa', 90), (2, 'Kente Cloth Pants', 'Africa', 80), (3, 'Dashiki Shirt', 'Africa', 70);", "sql": "INSERT INTO fashion_trends (id, garment_type, region, popularity) VALUES (4, 'Boubou', 'Africa', 85), (5, 'Kufi Hat', 'Africa', 95), (6, 'Fila Jacket', 'Africa', 88);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Points which has a Team of sportivo luqueño, and Wins larger than 1?", "schema": "CREATE TABLE table_name_41 (points INTEGER, team VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(points) FROM table_name_41 WHERE team = 'sportivo luqueño' AND wins > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 187).", "schema": null, "sql": "SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT JOIN ft4 t3 ON (t2.c1 = t3.c1) OFFSET 10 LIMIT 10;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Display the number of unique ports visited by vessels, grouped by flag state.", "schema": "CREATE TABLE FLAG_STATES (ID INT, NAME VARCHAR(50), CONTINENT VARCHAR(50)); INSERT INTO FLAG_STATES VALUES (1, 'Panama', 'Americas'); INSERT INTO FLAG_STATES VALUES (2, 'Singapore', 'Asia');", "sql": "SELECT F.NAME AS FLAG_STATE, COUNT(DISTINCT P.PORT) AS PORT_COUNT, RANK() OVER(ORDER BY COUNT(DISTINCT P.PORT) DESC) AS RANK FROM PORT_CALLS PC JOIN VESSELS V ON PC.VESSEL_ID = V.ID JOIN FLAG_STATES F ON V.FLAG_STATE = F.NAME GROUP BY F.ID, F.NAME;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 248, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team has a League from of ontario hockey league, and a Player of tony dehart?", "schema": "CREATE TABLE table_name_22 (nhl_team VARCHAR, league_from VARCHAR, player VARCHAR)", "sql": "SELECT nhl_team FROM table_name_22 WHERE league_from = 'ontario hockey league' AND player = 'tony dehart';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the average age of patients who have received psychotherapy in California?", "schema": "CREATE TABLE patients (id INT, name TEXT, age INT, state TEXT);CREATE TABLE treatments (id INT, patient_id INT, therapy TEXT);INSERT INTO patients (id, name, age, state) VALUES (1, 'John Doe', 45, 'California');INSERT INTO treatments (id, patient_id, therapy) VALUES (1, 1, 'Psychotherapy');", "sql": "SELECT AVG(patients.age) FROM patients INNER JOIN treatments ON patients.id = treatments.patient_id WHERE patients.state = 'California' AND treatments.therapy = 'Psychotherapy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What is the total number of marine species observed in the Arctic region?", "schema": "CREATE TABLE marine_species (name varchar(255), region varchar(255), observations int); INSERT INTO marine_species (name, region, observations) VALUES ('Polar Bear', 'Arctic', 2500), ('Walrus', 'Arctic', 1200), ('Arctic Fox', 'Arctic', 800);", "sql": "SELECT SUM(observations) FROM marine_species WHERE region = 'Arctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the round of 54 holes in which Phil Mickelson was the runner-up?", "schema": "CREATE TABLE table_name_33 (runner_s__up VARCHAR)", "sql": "SELECT 54 AS _holes FROM table_name_33 WHERE runner_s__up = 'phil mickelson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 27).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('a <100000> b', 'tsquery');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('a <100000> b', 'tsquery')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest 2009 value with a 2010 value of 141 and a 1985 value bigger than 165?", "schema": "CREATE TABLE table_name_18 (Id VARCHAR)", "sql": "SELECT MIN(2009) FROM table_name_18 WHERE 2010 = 141 AND 1985 > 165;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "How many heritage sites were established before 1900?", "schema": "CREATE TABLE Heritage_Sites (id INT, site_name VARCHAR(100), country VARCHAR(50), year_established INT, UNIQUE (id));", "sql": "SELECT COUNT(*) FROM Heritage_Sites WHERE year_established < 1900;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 209).", "schema": null, "sql": "-- Test diagnostics\nSELECT * FROM check_test(\n is_normal_function( 'zippo' ),\n false,\n 'is_normal_function(nofunc)',\n 'Function zippo() should be a normal function',\n ' Function zippo() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name and address of the customers who have both New and Pending orders.", "schema": "CREATE TABLE customers (customer_name VARCHAR, customer_address VARCHAR, customer_id VARCHAR); CREATE TABLE customer_orders (customer_id VARCHAR, order_status_code VARCHAR)", "sql": "SELECT T1.customer_name, T1.customer_address FROM customers AS T1 JOIN customer_orders AS T2 ON T1.customer_id = T2.customer_id WHERE T2.order_status_code = 'New' INTERSECT SELECT T1.customer_name, T1.customer_address FROM customers AS T1 JOIN customer_orders AS T2 ON T1.customer_id = T2.customer_id WHERE T2.order_status_code = 'Pending';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 340, "num_statements": 1} {"question": "Update the renewable energy source for 'ProjectD' in the US to 'Geothermal'.", "schema": "CREATE TABLE green_buildings (project_name VARCHAR(50), country VARCHAR(50), renewable_energy_source VARCHAR(50)); INSERT INTO green_buildings (project_name, country, renewable_energy_source) VALUES ('ProjectD', 'US', 'Solar');", "sql": "UPDATE green_buildings SET renewable_energy_source = 'Geothermal' WHERE project_name = 'ProjectD' AND country = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average earnings of poker players with height higher than 200?", "schema": "CREATE TABLE people (People_ID VARCHAR, Height INTEGER); CREATE TABLE poker_player (Earnings INTEGER, People_ID VARCHAR)", "sql": "SELECT AVG(T2.Earnings) FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID WHERE T1.Height > 200;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 1).", "schema": null, "sql": "create table idxpart (a int, b int, c text) partition by range (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 67, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'select_views' (example 16).", "schema": null, "sql": "CREATE VIEW my_credit_card_usage_normal AS\n SELECT * FROM my_credit_card_secure l NATURAL JOIN credit_usage r;", "explanation": "DDL from PostgreSQL core regression test for Select Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which goals/game ratio has fewer than 201 goals and fewer than 170 appearances?", "schema": "CREATE TABLE table_name_29 (goals VARCHAR, Game VARCHAR, appearances VARCHAR)", "sql": "SELECT goals / Game AS Ratio FROM table_name_29 WHERE goals < 201 AND appearances < 170;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Programming which has an Aspect of 4:3, and a PSIP Short Name of ibc-tv?", "schema": "CREATE TABLE table_name_18 (programming VARCHAR, aspect VARCHAR, psip_short_name VARCHAR)", "sql": "SELECT programming FROM table_name_18 WHERE aspect = '4:3' AND psip_short_name = 'ibc-tv';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the average number of food safety violations per inspection for each location?", "schema": "CREATE TABLE food_safety_inspections (location VARCHAR(255), inspection_date DATE, violations INT); INSERT INTO food_safety_inspections (location, inspection_date, violations) VALUES ('Location A', '2022-01-01', 3), ('Location B', '2022-01-02', 5), ('Location A', '2022-01-03', 2), ('Location C', '2022-01-04', 4), ('Location A', '2022-01-05', 1);", "sql": "SELECT location, AVG(violations) as average_violations FROM food_safety_inspections GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What hometown was FR Year, and ha Brown's Gymnastics club?", "schema": "CREATE TABLE table_name_20 (hometown VARCHAR, year VARCHAR, club VARCHAR)", "sql": "SELECT hometown FROM table_name_20 WHERE year = 'fr' AND club = 'brown's gymnastics';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (zombodb--3000.0.3--3000.0.4, item 15).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION zdb.t_test_agg(aggregate_name text, fields text[], t_type zdb.ttesttype) RETURNS jsonb AS 'MODULE_PATHNAME', 't_test_fields_agg_wrapper' IMMUTABLE LANGUAGE c PARALLEL SAFE STRICT;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 206, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which type of surface do Amer Delic Robert Kendrick's opponents have?", "schema": "CREATE TABLE table_name_53 (surface VARCHAR, opponents VARCHAR)", "sql": "SELECT surface FROM table_name_53 WHERE opponents = 'amer delic robert kendrick';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total waste generated by each mining operation, categorized by waste type?", "schema": "CREATE TABLE mining_operations (operation_id INT, operation_name VARCHAR(50)); CREATE TABLE waste_generated (operation_id INT, waste_type VARCHAR(50), quantity_generated INT); INSERT INTO mining_operations (operation_id, operation_name) VALUES (1, 'Operation A'), (2, 'Operation B'), (3, 'Operation C'); INSERT INTO waste_generated (operation_id, waste_type, quantity_generated) VALUES (1, 'Tailings', 1000), (1, 'Rock', 2000), (2, 'Tailings', 1500), (2, 'Rock', 500), (3, 'Tailings', 2500);", "sql": "SELECT mining_operations.operation_name, waste_generated.waste_type, SUM(waste_generated.quantity_generated) AS total_waste_generated FROM mining_operations INNER JOIN waste_generated ON mining_operations.operation_id = waste_generated.operation_id GROUP BY mining_operations.operation_name, waste_generated.waste_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 319, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the position of the player from the college of Nevada with a round over 5?", "schema": "CREATE TABLE table_name_23 (position VARCHAR, round VARCHAR, college VARCHAR)", "sql": "SELECT position FROM table_name_23 WHERE round > 5 AND college = 'nevada';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 286).", "schema": null, "sql": "select aggfns(distinct a,a,c order by c using ~<~)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),\n generate_series(1,2) i;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select aggfns(distinct a,a,c order by c using ~<~)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),\n generate_series(1,2) i) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Insert a new record into the 'regulatory_frameworks' table for the country 'Jamaica' and framework 'DLT Regulation'", "schema": "CREATE TABLE regulatory_frameworks (framework_id INT PRIMARY KEY, country VARCHAR(100), framework VARCHAR(100));", "sql": "INSERT INTO regulatory_frameworks (country, framework) VALUES ('Jamaica', 'DLT Regulation');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Insert a new well with specific details.", "schema": "CREATE TABLE wells (well_id INT, well_type VARCHAR(10), location VARCHAR(20), production_rate FLOAT);", "sql": "INSERT INTO wells (well_id, well_type, location, production_rate) VALUES (5, 'onshore', 'Brazil', 1500);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'hash_index' (example 99).", "schema": null, "sql": "CREATE INDEX hash_idx ON hash_temp_heap USING hash (x);", "explanation": "DDL from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Frank Nobilo plays for what country?", "schema": "CREATE TABLE table_name_43 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_43 WHERE player = 'frank nobilo';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of teachers who have participated in lifelong learning programs, and what are the average and total costs?", "schema": "CREATE TABLE teacher_ll (teacher_id INT, ll_program_id INT, PRIMARY KEY(teacher_id, ll_program_id)); CREATE TABLE ll_program (ll_program_id INT, ll_program_name VARCHAR(50), num_teachers INT, avg_cost DECIMAL(5,2), total_cost DECIMAL(10,2), PRIMARY KEY(ll_program_id)); INSERT INTO teacher_ll (teacher_id, ll_program_id) VALUES (1, 1), (1, 2), (2, 1), (3, 2); INSERT INTO ll_program (ll_program_id, ll_program_name, num_teachers, avg_cost, total_cost) VALUES (1, 'Coding Bootcamp', 50, 1500, 75000), (2, 'Data Science Specialization', 30, 2000, 60000);", "sql": "SELECT COUNT(DISTINCT teacher_id) as total_teachers, AVG(avg_cost) as avg_ll_cost, SUM(total_cost) as total_ll_cost FROM teacher_ll tl INNER JOIN ll_program lp ON tl.ll_program_id = lp.ll_program_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which is the lowest played with 28-10 points and goals higher than 29?", "schema": "CREATE TABLE table_name_97 (played INTEGER, points VARCHAR, goals_for VARCHAR)", "sql": "SELECT MIN(played) FROM table_name_97 WHERE points = '28-10' AND goals_for > 29;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 547).", "schema": null, "sql": "CREATE VIEW rls_view WITH (security_invoker) AS\n SELECT * FROM z1 WHERE f_leak(b);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 162).", "schema": null, "sql": "SELECT cube_ur_coord('(42,137)'::cube, 1);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 58).", "schema": null, "sql": "select int4range(1, 3)::int4multirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select int4range(1, 3)::int4multirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "List all marine species with a conservation status of 'Critically Endangered'?", "schema": "CREATE TABLE species (id INT, name VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO species (id, name, conservation_status) VALUES (1, 'Atlantic Salmon', 'Endangered'), (2, 'Blue Whale', 'Critically Endangered');", "sql": "SELECT name FROM species WHERE conservation_status = 'Critically Endangered';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par when the tournament involved was the Transitions Championship?", "schema": "CREATE TABLE table_name_45 (to_par VARCHAR, tournament VARCHAR)", "sql": "SELECT to_par FROM table_name_45 WHERE tournament = 'transitions championship';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Score has an Opponent in the final of mehdi tahiri?", "schema": "CREATE TABLE table_name_96 (score VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT score FROM table_name_96 WHERE opponent_in_the_final = 'mehdi tahiri';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total tonnage of all cargo handled by the 'cargo_handling' table in the year 2021?", "schema": "CREATE TABLE cargo_handling (id INT, cargo_id INT, handling_date DATE, tonnage INT, PRIMARY KEY(id));", "sql": "SELECT SUM(tonnage) FROM cargo_handling WHERE YEAR(handling_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average price of properties with a sustainable rating above 70 in each city?", "schema": "CREATE TABLE Sustainable_Ratings (id INT, property_id INT, sustainable_rating INT); CREATE VIEW High_Sustainable_Properties AS SELECT sr.property_id, p.address, sr.sustainable_rating FROM Sustainable_Ratings sr JOIN Property p ON sr.property_id = p.id WHERE sr.sustainable_rating > 70;", "sql": "SELECT hsp.address, AVG(p.price) as avg_price FROM High_Sustainable_Properties hsp JOIN Property p ON hsp.property_id = p.id GROUP BY hsp.address;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result on 1990-11-04?", "schema": "CREATE TABLE table_name_55 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_55 WHERE date = '1990-11-04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average safety score for each AI algorithm, ranked by the highest average?", "schema": "CREATE TABLE ai_algorithms (ai_algorithm_id INT, algorithm_name VARCHAR(50), safety_score FLOAT); INSERT INTO ai_algorithms (ai_algorithm_id, algorithm_name, safety_score) VALUES (1, 'Algorithm A', 0.8), (2, 'Algorithm B', 0.9), (3, 'Algorithm C', 0.7);", "sql": "SELECT algorithm_name, AVG(safety_score) as avg_safety_score FROM ai_algorithms GROUP BY algorithm_name ORDER BY avg_safety_score DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different colleges do attend the tryout test?", "schema": "CREATE TABLE tryout (cName VARCHAR)", "sql": "SELECT COUNT(DISTINCT cName) FROM tryout;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Find the user with the lowest financial wellbeing score in the FinancialWellbeing table.", "schema": "CREATE TABLE FinancialWellbeing (userID VARCHAR(20), wellbeingScore INT); INSERT INTO FinancialWellbeing (userID, wellbeingScore) VALUES ('Ahmed', 6), ('Sara', 8), ('Mohammed', 5), ('Aisha', 7);", "sql": "SELECT userID, MIN(wellbeingScore) FROM FinancialWellbeing;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What are the total expenses for each department in the 'city_expenses' database?", "schema": "CREATE TABLE department (id INT, name VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO department (id, name, budget) VALUES (1, 'Public Works', 500000.00), (2, 'Education', 800000.00), (3, 'Health', 700000.00);", "sql": "SELECT name, SUM(budget) FROM department GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What day in November has a record of 15-6-1?", "schema": "CREATE TABLE table_name_91 (november INTEGER, record VARCHAR)", "sql": "SELECT AVG(november) FROM table_name_91 WHERE record = '15-6-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Find dishes that contain both tomatoes and onions as ingredients.", "schema": "CREATE TABLE ingredients (id INT, name VARCHAR(255)); INSERT INTO ingredients (id, name) VALUES (1, 'Tomatoes'), (2, 'Onions'), (3, 'Garlic'), (4, 'Cheese'), (5, 'Tofu'), (6, 'Chicken'), (7, 'Beef'), (8, 'Tomato Sauce'), (9, 'Onion Rings'); CREATE TABLE dish_ingredients (dish_id INT, ingredient_id INT); INSERT INTO dish_ingredients (dish_id, ingredient_id) VALUES (1, 1), (1, 2), (1, 3), (1, 8), (2, 2), (2, 3), (2, 4), (3, 1), (3, 2), (3, 6), (4, 2), (4, 9), (5, 3), (5, 4), (5, 7);", "sql": "SELECT dish_ingredients.dish_id FROM dish_ingredients INNER JOIN ingredients ON dish_ingredients.ingredient_id = ingredients.id WHERE ingredients.name IN ('Tomatoes', 'Onions') GROUP BY dish_ingredients.dish_id HAVING COUNT(DISTINCT ingredients.name) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 255, "num_statements": 1} {"question": "How many songs were released by 'Female Artists' in the 'Pop' genre before 2010?", "schema": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(100), gender VARCHAR(10)); INSERT INTO artists (artist_id, artist_name, gender) VALUES (1, 'Taylor Swift', 'Female'), (2, 'Ed Sheeran', 'Male'), (3, 'Kendrick Lamar', 'Male'), (4, 'Ariana Grande', 'Female'); CREATE TABLE songs (song_id INT, song_name VARCHAR(100), release_year INT, genre VARCHAR(50), artist_id INT); INSERT INTO songs (song_id, song_name, release_year, genre, artist_id) VALUES (1, 'Shape of You', 2017, 'Pop', 2), (2, 'Thinking Out Loud', 2014, 'Pop', 2), (3, 'Bohemian Rhapsody', 1975, 'Rock', 3), (4, 'Problem', 2014, 'Pop', 4), (5, 'The Way', 2013, 'Pop', 4);", "sql": "SELECT COUNT(*) FROM songs s INNER JOIN artists a ON s.artist_id = a.artist_id WHERE a.gender = 'Female' AND s.genre = 'Pop' AND s.release_year < 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'join' (example 879).", "schema": null, "sql": "CREATE TEMP TABLE rescan_bhs (a INT);", "explanation": "DDL from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "What is the policy type and corresponding risk score for each policy, ordered by risk score in ascending order, for policies issued in 'Illinois'?", "schema": "CREATE TABLE Policies (PolicyID INT, PolicyType VARCHAR(20), IssueState VARCHAR(20), RiskScore DECIMAL(5,2)); INSERT INTO Policies (PolicyID, PolicyType, IssueState, RiskScore) VALUES (1, 'Auto', 'Illinois', 0.10), (2, 'Home', 'Illinois', 0.05), (3, 'Life', 'Illinois', 0.15);", "sql": "SELECT PolicyType, RiskScore FROM Policies WHERE IssueState = 'Illinois' ORDER BY RiskScore ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of viewers for a rating greater than 9.4?", "schema": "CREATE TABLE table_name_47 (viewers__m_ INTEGER, rating INTEGER)", "sql": "SELECT MAX(viewers__m_) FROM table_name_47 WHERE rating > 9.4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total yield for each crop type in the 'Agroecology' schema?", "schema": "CREATE SCHEMA Agroecology; CREATE TABLE crop_yields (crop_type TEXT, yield NUMERIC) IN Agroecology; INSERT INTO crop_yields (crop_type, yield) VALUES ('Wheat', 12000), ('Rice', 15000), ('Corn', 20000), ('Wheat', 14000), ('Rice', 16000);", "sql": "SELECT crop_type, SUM(yield) as total_yield FROM Agroecology.crop_yields GROUP BY crop_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Display the total calories for each dish category in the menu_categories and menu tables.", "schema": "CREATE TABLE menu_categories (category_id INT, category_name TEXT); CREATE TABLE menu (menu_id INT, category_id INT, dish_name TEXT, calories INT);", "sql": "SELECT menu_categories.category_name, SUM(menu.calories) FROM menu INNER JOIN menu_categories ON menu.category_id = menu_categories.category_id GROUP BY menu_categories.category_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was Arthur Jones the away captain at Sydney Cricket Ground?", "schema": "CREATE TABLE table_name_33 (date VARCHAR, away_captain VARCHAR, venue VARCHAR)", "sql": "SELECT date FROM table_name_33 WHERE away_captain = 'arthur jones' AND venue = 'sydney cricket ground';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Calculate the percentage difference between the recycling rates of two specific districts.", "schema": "CREATE TABLE RecyclingRates (id INT, district VARCHAR(20), recycling_rate DECIMAL(5,2)); INSERT INTO RecyclingRates (id, district, recycling_rate) VALUES (1, 'DistrictA', 0.65), (2, 'DistrictB', 0.70), (3, 'DistrictC', 0.55);", "sql": "SELECT (SELECT R2.recycling_rate FROM RecyclingRates R2 WHERE R2.district = 'DistrictB') - (SELECT R1.recycling_rate FROM RecyclingRates R1 WHERE R1.district = 'DistrictA') AS difference;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: whatthe minimum round where grand prix is german grand prix", "schema": "CREATE TABLE table_1137704_2 (round INTEGER, grand_prix VARCHAR)", "sql": "SELECT MIN(round) FROM table_1137704_2 WHERE grand_prix = 'German grand_prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the building at the Jewellery quarter?", "schema": "CREATE TABLE table_name_61 (name VARCHAR, location VARCHAR)", "sql": "SELECT name FROM table_name_61 WHERE location = 'jewellery quarter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many floors have 207 w. hastings st. as the address?", "schema": "CREATE TABLE table_name_1 (floors VARCHAR, street_address VARCHAR)", "sql": "SELECT COUNT(floors) FROM table_name_1 WHERE street_address = '207 w. hastings st.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which High rebounds have High points of chris bosh (18)?", "schema": "CREATE TABLE table_name_50 (high_rebounds VARCHAR, high_points VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_50 WHERE high_points = 'chris bosh (18)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Time has a Year of 1984?", "schema": "CREATE TABLE table_name_77 (time VARCHAR, year VARCHAR)", "sql": "SELECT time FROM table_name_77 WHERE year = 1984;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the average number of public libraries per state in the United States?", "schema": "CREATE TABLE State (StateName VARCHAR(50), Country VARCHAR(50), NumberOfPublicLibraries INT); INSERT INTO State (StateName, Country, NumberOfPublicLibraries) VALUES ('California', 'United States', 1500), ('Texas', 'United States', 500), ('New York', 'United States', 1000);", "sql": "SELECT AVG(NumberOfPublicLibraries) FROM State WHERE Country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2007 for the 2003 desert prince?", "schema": "CREATE TABLE table_name_48 (Id VARCHAR)", "sql": "SELECT 2007 FROM table_name_48 WHERE 2003 = 'desert prince';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which countries have the highest yield for soybean in the 'yields' table?", "schema": "CREATE TABLE yields (id INT, crop VARCHAR(255), year INT, country VARCHAR(255), yield INT); INSERT INTO yields (id, crop, year, country, yield) VALUES (1, 'Corn', 2020, 'USA', 12000), (2, 'Soybean', 2020, 'Brazil', 4000), (3, 'Wheat', 2020, 'China', 8000);", "sql": "SELECT country, yield FROM yields WHERE crop = 'Soybean' ORDER BY yield DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 290).", "schema": null, "sql": "CREATE VIEW rec2v WITH (security_barrier) AS SELECT * FROM rec2;", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many cases were handled by attorneys who have more than 10 years of experience?", "schema": "CREATE TABLE Attorneys (AttorneyID INT, LastName VARCHAR(255), YearsOfExperience INT); INSERT INTO Attorneys (AttorneyID, LastName, YearsOfExperience) VALUES (1, 'Patel', 15), (2, 'Singh', 12), (3, 'Kim', 8); CREATE TABLE Cases (CaseID INT, AttorneyID INT);", "sql": "SELECT COUNT(*) FROM Cases INNER JOIN Attorneys ON Cases.AttorneyID = Attorneys.AttorneyID WHERE Attorneys.YearsOfExperience > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "How many female and male graduate students are there in each department?", "schema": "CREATE TABLE dept_students (id INT, department VARCHAR(255), gender VARCHAR(10), student_count INT); INSERT INTO dept_students (id, department, gender, student_count) VALUES (1, 'Physics', 'Male', 15), (2, 'Physics', 'Female', 10), (3, 'Computer Science', 'Male', 20), (4, 'Computer Science', 'Female', 18);", "sql": "SELECT department, gender, SUM(student_count) AS total_students FROM dept_students GROUP BY department, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "List all the indigenous food systems in Brazil with their respective contact emails.", "schema": "CREATE TABLE indigenous_food_systems (system_id INT, name TEXT, location TEXT, contact_email TEXT, country TEXT); INSERT INTO indigenous_food_systems (system_id, name, location, contact_email, country) VALUES (1, 'Forest Foods', 'tribal village', 'forestfoods@example.com', 'Brazil');", "sql": "SELECT name, contact_email FROM indigenous_food_systems WHERE country = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: With the version of 1.5a, what is the release date?", "schema": "CREATE TABLE table_2263152_1 (release_date VARCHAR, version VARCHAR)", "sql": "SELECT release_date FROM table_2263152_1 WHERE version = '1.5a';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the county where kerry# is 59740?", "schema": "CREATE TABLE table_1302886_1 (county VARCHAR, kerry_number VARCHAR)", "sql": "SELECT county FROM table_1302886_1 WHERE kerry_number = 59740;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which water treatment plants are located in drought-prone areas with a drought status of 'extreme'?", "schema": "CREATE TABLE water_plants (id INT, name VARCHAR(255), lat FLOAT, long FLOAT); INSERT INTO water_plants (id, name, lat, long) VALUES (1, 'Plant C', 37.7749, -122.4194), (2, 'Plant D', 29.7604, -95.3698); CREATE TABLE drought_status (id INT, region VARCHAR(255), status VARCHAR(255)); INSERT INTO drought_status (id, region, status) VALUES (1, 'California', 'extreme'), (2, 'Texas', 'moderate');", "sql": "SELECT w.name, ds.status FROM water_plants w JOIN drought_status ds ON ST_DWithin(ST_SetSRID(ST_MakePoint(w.long, w.lat), 4326), ST_SetSRID(ST_MakePoint(ds.long, ds.lat), 4326), 100000) WHERE ds.status = 'extreme';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the French word for the Italian word nazione?", "schema": "CREATE TABLE table_15040_8 (french VARCHAR, italian VARCHAR)", "sql": "SELECT french FROM table_15040_8 WHERE italian = 'nazione';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 29).", "schema": null, "sql": "select armor('', array[null], array['foo']);", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did the team who had 132 total points have in round 1?", "schema": "CREATE TABLE table_24784769_1 (round1 INTEGER, total_points VARCHAR)", "sql": "SELECT MAX(round1) FROM table_24784769_1 WHERE total_points = 132;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the start of the race with 676 laps?", "schema": "CREATE TABLE table_name_29 (start VARCHAR, laps VARCHAR)", "sql": "SELECT start FROM table_name_29 WHERE laps = '676';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the average age of athletes in the 'athlete_wellbeing' table who play 'Basketball'?", "schema": "CREATE TABLE athlete_wellbeing (athlete_id INT, name VARCHAR(50), age INT, sport VARCHAR(50)); INSERT INTO athlete_wellbeing (athlete_id, name, age, sport) VALUES (1, 'John Doe', 25, 'Basketball'); INSERT INTO athlete_wellbeing (athlete_id, name, age, sport) VALUES (2, 'Jane Smith', 28, 'Basketball'); INSERT INTO athlete_wellbeing (athlete_id, name, age, sport) VALUES (3, 'Jim Brown', 30, 'Football');", "sql": "SELECT AVG(age) FROM athlete_wellbeing WHERE sport = 'Basketball';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of traditional art pieces by type, region, and continent?", "schema": "CREATE TABLE Art (ArtID INT, Type VARCHAR(255), Region VARCHAR(255), Continent VARCHAR(255), Quantity INT); INSERT INTO Art (ArtID, Type, Region, Continent, Quantity) VALUES (1, 'Painting', 'Asia', 'Asia', 25), (2, 'Sculpture', 'Africa', 'Africa', 18), (3, 'Textile', 'South America', 'South America', 30), (4, 'Pottery', 'Europe', 'Europe', 20), (5, 'Jewelry', 'North America', 'North America', 12);", "sql": "SELECT Type, Region, Continent, SUM(Quantity) as Total_Quantity FROM Art GROUP BY Type, Region, Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the game located on January 23, 2008?", "schema": "CREATE TABLE table_name_93 (location VARCHAR, date VARCHAR)", "sql": "SELECT location FROM table_name_93 WHERE date = 'january 23, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the failure rate of aircraft by manufacturer?", "schema": "CREATE SCHEMA Aircraft;CREATE TABLE Aircraft.FlightSafetyRecords (manufacturer VARCHAR(50), failure INT);INSERT INTO Aircraft.FlightSafetyRecords (manufacturer, failure) VALUES ('Boeing', 15), ('Airbus', 10), ('Comac', 5), ('Embraer', 3);", "sql": "SELECT manufacturer, 100.0 * AVG(failure) AS failure_rate FROM Aircraft.FlightSafetyRecords GROUP BY manufacturer;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Calculate the average speed for each vessel in the 'Tankers' fleet in the past week.", "schema": "CREATE TABLE Fleets (id INT, name VARCHAR(255)); INSERT INTO Fleets (id, name) VALUES (1, 'Tankers'); CREATE TABLE VesselSpeeds (id INT, fleet_id INT, speed INT, speed_date DATE); INSERT INTO VesselSpeeds (id, fleet_id, speed, speed_date) VALUES (1, 1, 20, '2021-08-01'), (2, 1, 25, '2021-08-02');", "sql": "SELECT fleet_id, AVG(speed) as avg_speed FROM VesselSpeeds WHERE fleet_id = 1 AND speed_date >= DATEADD(week, -1, GETDATE()) GROUP BY fleet_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 455).", "schema": null, "sql": "create rule r1 as on update to rules_base do instead\n select * from rules_base where f1 = 1 for update;", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What original air date has 5.85 u.s. viewers (million)?", "schema": "CREATE TABLE table_27117365_1 (original_air_date VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT original_air_date FROM table_27117365_1 WHERE us_viewers__million_ = '5.85';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the total number of high severity vulnerabilities found in the last month?", "schema": "CREATE TABLE Vulnerabilities (id INT, report_date DATE, severity INT); INSERT INTO Vulnerabilities (id, report_date, severity) VALUES (1, '2022-04-01', 3), (2, '2022-04-15', 5), (3, '2022-05-01', 7);", "sql": "SELECT COUNT(*) FROM Vulnerabilities WHERE severity >= 5 AND report_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 245).", "schema": null, "sql": "select '{\"a\": {\"b\":{\"c\": \"foo\"}}}'::jsonb #>> array['a'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{\"a\": {\"b\":{\"c\": \"foo\"}}}'::jsonb #>> array['a']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What building in Louisville had more than 35 floors?", "schema": "CREATE TABLE table_name_56 (name VARCHAR, city VARCHAR, floors VARCHAR)", "sql": "SELECT name FROM table_name_56 WHERE city = 'louisville' AND floors > 35;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average lead time for each product line in the 'supply_chain' schema?", "schema": "CREATE TABLE supply_chain.lead_time (product_line VARCHAR(50), lead_time INT); INSERT INTO supply_chain.lead_time (product_line, lead_time) VALUES ('Product Line A', 30), ('Product Line A', 45), ('Product Line B', 60), ('Product Line B', 75);", "sql": "SELECT product_line, AVG(lead_time) as average_lead_time FROM supply_chain.lead_time GROUP BY product_line;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the margin of victory when the winning score was –14 (70-68-67=205)?", "schema": "CREATE TABLE table_name_59 (margin_of_victory VARCHAR, winning_score VARCHAR)", "sql": "SELECT margin_of_victory FROM table_name_59 WHERE winning_score = –14(70 - 68 - 67 = 205);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 206).", "schema": null, "sql": "select interval '2147483647 centuries';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '2147483647 centuries') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which name was in a position less than 4 with 2 losses?", "schema": "CREATE TABLE table_name_78 (name VARCHAR, position VARCHAR, lost VARCHAR)", "sql": "SELECT name FROM table_name_78 WHERE position < 4 AND lost = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game played in El Paso, Texas?", "schema": "CREATE TABLE table_23718905_6 (date VARCHAR, city VARCHAR)", "sql": "SELECT date FROM table_23718905_6 WHERE city = 'El Paso, Texas';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Season has a Club of real madrid, and a Rank smaller than 6, and less than 121 goals?", "schema": "CREATE TABLE table_name_7 (season VARCHAR, goals VARCHAR, club VARCHAR, rank VARCHAR)", "sql": "SELECT season FROM table_name_7 WHERE club = 'real madrid' AND rank < 6 AND goals < 121;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Motion Picture after 2003 had Viola Davis nominated for Best Supporting Actress?", "schema": "CREATE TABLE table_name_70 (motion_picture VARCHAR, award VARCHAR, actor VARCHAR, year VARCHAR, result VARCHAR)", "sql": "SELECT motion_picture FROM table_name_70 WHERE year > 2003 AND result = 'nominated' AND actor = 'viola davis' AND award = 'best supporting actress';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Identify the number of ethical labor practice violations for each supplier, along with their supplier type and last inspection date.", "schema": "CREATE TABLE suppliers (supplier_id INT, supplier_type VARCHAR(255));CREATE TABLE violations (violation_id INT, violation_count INT, FK_supplier_id REFERENCES suppliers(supplier_id));CREATE TABLE inspections (inspection_id INT, last_inspection_date DATE, FK_supplier_id REFERENCES suppliers(supplier_id));", "sql": "SELECT s.supplier_type, v.violation_count, i.last_inspection_date FROM suppliers s JOIN violations v ON s.supplier_id = v.supplier_id JOIN inspections i ON s.supplier_id = i.supplier_id GROUP BY s.supplier_id, v.violation_count, i.last_inspection_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "How many green buildings are there in total in the database?", "schema": "CREATE TABLE green_buildings (id INT, building_name VARCHAR(100), country VARCHAR(50)); INSERT INTO green_buildings (id, building_name, country) VALUES (1, 'Green Building 1', 'Canada'), (2, 'Green Building 2', 'Mexico');", "sql": "SELECT COUNT(*) FROM green_buildings;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the video game magazine that was issued bimonthly?", "schema": "CREATE TABLE table_name_36 (title VARCHAR, magazine_type VARCHAR, frequency VARCHAR)", "sql": "SELECT title FROM table_name_36 WHERE magazine_type = 'video game' AND frequency = 'bimonthly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Which rural infrastructure projects in the 'rural_infrastructure' table were not initiated in the same year as any community development initiatives in the 'community_development' table?", "schema": "CREATE TABLE rural_infrastructure (id INT, project_name VARCHAR(50), initiation_year INT); INSERT INTO rural_infrastructure (id, project_name, initiation_year) VALUES (1, 'Irrigation System Upgrade', 2008), (2, 'Rural Road Expansion', 2022); CREATE TABLE community_development (id INT, initiative_name VARCHAR(50), initiation_year INT); INSERT INTO community_development (id, initiative_name, initiation_year) VALUES (1, 'Youth Empowerment Program', 2010), (2, 'Renewable Energy Workshops', 2022);", "sql": "SELECT project_name FROM rural_infrastructure WHERE initiation_year NOT IN (SELECT initiation_year FROM community_development);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "List the names and treatment start dates of patients who have been treated with Cognitive Behavioral Therapy (CBT) or Dialectical Behavior Therapy (DBT)", "schema": "CREATE TABLE patients (patient_id INT, name VARCHAR(50), age INT, state VARCHAR(50)); CREATE TABLE therapy_sessions (session_id INT, patient_id INT, therapist_id INT, session_date DATE, therapy_type VARCHAR(50)); INSERT INTO patients VALUES (1, 'John Doe', 35, 'California'); INSERT INTO patients VALUES (2, 'Jane Smith', 28, 'California'); INSERT INTO therapy_sessions VALUES (1, 1, 101, '2022-01-01', 'CBT'); INSERT INTO therapy_sessions VALUES (2, 2, 102, '2022-02-01', 'DBT');", "sql": "SELECT patients.name, therapy_sessions.session_date FROM patients JOIN therapy_sessions ON patients.patient_id = therapy_sessions.patient_id WHERE therapy_sessions.therapy_type IN ('CBT', 'DBT');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "What are the most common mental health conditions in a specific age group?", "schema": "CREATE TABLE Patients (PatientID INT, Age INT, Gender VARCHAR(10)); CREATE TABLE MentalHealthConditions (ConditionID INT, PatientID INT, Condition VARCHAR(50));", "sql": "SELECT Patients.Age, MentalHealthConditions.Condition, COUNT(MentalHealthConditions.ConditionID) FROM Patients INNER JOIN MentalHealthConditions ON Patients.PatientID = MentalHealthConditions.PatientID WHERE Patients.Age BETWEEN 20 AND 30 GROUP BY Patients.Age, MentalHealthConditions.Condition ORDER BY Patients.Age, COUNT(MentalHealthConditions.ConditionID) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 365, "num_statements": 1} {"question": "What is the average revenue of products in the 'Organic' subcategory?", "schema": "CREATE TABLE products (product_id INT, name TEXT, revenue FLOAT, subcategory TEXT); INSERT INTO products (product_id, name, revenue, subcategory) VALUES (1, 'Organic Cotton Shirt', 25.00, 'Clothing'), (2, 'Organic Avocados', 5.00, 'Food'); CREATE TABLE subcategories (subcategory TEXT, category TEXT); INSERT INTO subcategories (subcategory, category) VALUES ('Clothing', 'Apparel'), ('Food', 'Groceries');", "sql": "SELECT AVG(revenue) FROM products INNER JOIN subcategories ON products.subcategory = subcategories.subcategory WHERE subcategories.subcategory = 'Clothing' AND subcategories.category = 'Apparel';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many deaths did eseta cause?", "schema": "CREATE TABLE table_name_34 (deaths VARCHAR, name VARCHAR)", "sql": "SELECT deaths FROM table_name_34 WHERE name = 'eseta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "How many TV shows are there for each genre?", "schema": "CREATE TABLE tv_show (tv_show_id INT, title VARCHAR(50), genre VARCHAR(50)); INSERT INTO tv_show (tv_show_id, title, genre) VALUES (1, 'Show 1', 'Comedy'), (2, 'Show 2', 'Drama'), (3, 'Show 3', 'Comedy');", "sql": "SELECT genre, COUNT(title) FROM tv_show GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the minimum depth of the Puerto Rico Trench?", "schema": "CREATE TABLE ocean_floor_mapping (name VARCHAR(255), location VARCHAR(255), min_depth FLOAT); INSERT INTO ocean_floor_mapping (name, location, min_depth) VALUES ('Puerto Rico Trench', 'Atlantic Ocean', 8605.0);", "sql": "SELECT min_depth FROM ocean_floor_mapping WHERE name = 'Puerto Rico Trench';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date for a CD format with a Region of Europe?", "schema": "CREATE TABLE table_name_54 (date VARCHAR, region VARCHAR, format VARCHAR)", "sql": "SELECT date FROM table_name_54 WHERE region = 'europe' AND format = 'cd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'groupingsets' (example 175).", "schema": null, "sql": "create table gs_group_1 as\nselect g100, g10, sum(g::numeric), count(*), max(g::text)\nfrom gs_data_1 group by cube (g1000, g100,g10);", "explanation": "DDL from PostgreSQL core regression test for Groupingsets.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 132, "num_statements": 1} {"question": "Find the community health worker with the lowest cultural competency score.", "schema": "CREATE TABLE community_health_workers_2 (worker_id INT, region VARCHAR(10), cultural_competency_score INT); INSERT INTO community_health_workers_2 (worker_id, region, cultural_competency_score) VALUES (1, 'Northeast', 70), (2, 'Southeast', 75), (3, 'Midwest', 80);", "sql": "SELECT worker_id, MIN(cultural_competency_score) FROM community_health_workers_2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List all artworks created in the year 1905, in ascending order by the number of exhibitions they have been featured in.", "schema": "CREATE TABLE Artworks (id INT, name TEXT, creation_date DATE, exhibitions INT);", "sql": "SELECT name, exhibitions FROM (SELECT name, exhibitions, ROW_NUMBER() OVER (ORDER BY exhibitions ASC) as rn FROM Artworks WHERE creation_date = '1905-01-01') t WHERE rn <= 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the player for seattle", "schema": "CREATE TABLE table_15621965_14 (player VARCHAR, school_club_team VARCHAR)", "sql": "SELECT player FROM table_15621965_14 WHERE school_club_team = 'Seattle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Insert a new record for a donation of $250 made by 'Jamal' on '2022-04-01'.", "schema": "CREATE TABLE Donations (DonationID INT, DonorID INT, Amount FLOAT, DonationDate DATE); INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (1, 1, 500.00, '2021-01-01'), (2, 2, 800.00, '2021-02-01');", "sql": "INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (3, (SELECT DonorID FROM Donors WHERE Name = 'Jamal'), 250.00, '2022-04-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total budget for all agricultural innovation projects in the 'rural_dev' schema?", "schema": "CREATE TABLE agricultural_innovation (project_name VARCHAR(255), budget INT); INSERT INTO agricultural_innovation (project_name, budget) VALUES ('Precision Agriculture', 1000000), ('Smart Irrigation', 800000);", "sql": "SELECT SUM(budget) FROM agricultural_innovation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the total budget allocated for public safety and environmental protection services in the state of California in 2021?", "schema": "CREATE TABLE budget_allocation (year INT, state VARCHAR(20), service VARCHAR(20), amount INT); INSERT INTO budget_allocation VALUES (2021, 'California', 'Public Safety', 3000000), (2021, 'California', 'Environmental Protection', 4000000);", "sql": "SELECT SUM(amount) FROM budget_allocation WHERE state = 'California' AND (service = 'Public Safety' OR service = 'Environmental Protection') AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total of the first season in Segunda División that has a City of cañete?", "schema": "CREATE TABLE table_name_44 (first_season_of_current_spell_in_segunda_división INTEGER, city VARCHAR)", "sql": "SELECT SUM(first_season_of_current_spell_in_segunda_división) FROM table_name_44 WHERE city = 'cañete';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Identify the bridges in the transportation division that require maintenance in the next 6 months and display their maintenance schedule.", "schema": "CREATE TABLE bridges (id INT, name VARCHAR(50), division VARCHAR(50), maintenance_date DATE); INSERT INTO bridges (id, name, division, maintenance_date) VALUES (1, 'Bridge A', 'Transportation', '2024-02-01'), (2, 'Bridge B', 'Transportation', '2023-07-15'), (3, 'Bridge C', 'Transportation', '2025-03-20');", "sql": "SELECT name, maintenance_date FROM bridges WHERE division = 'Transportation' AND maintenance_date BETWEEN CURDATE() AND DATE_ADD(CURDATE(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Outcome, when Tournament is \"Chiang Mai , Thailand\"?", "schema": "CREATE TABLE table_name_90 (outcome VARCHAR, tournament VARCHAR)", "sql": "SELECT outcome FROM table_name_90 WHERE tournament = 'chiang mai , thailand';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest episode written by John Shiban & Thomas Schnauz?", "schema": "CREATE TABLE table_26736342_1 (no_in_series INTEGER, written_by VARCHAR)", "sql": "SELECT MAX(no_in_series) FROM table_26736342_1 WHERE written_by = 'John Shiban & Thomas Schnauz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/citext/citext--1.4.sql */\n\n-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION citext\" to load this file. \\quit\n\n--\n-- PostgreSQL code for CITEXT.\n--\n-- Most I/O functions, and a few others, piggyback on the \"text\" type\n-- functions via the implicit cast to text.\n--\n\n--\n-- Shell type to keep things a bit quieter.\n--\n\nCREATE TYPE citext;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 401, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 50).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '\"aaa\"', '$' RETURNING int DEFAULT 111 ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '\"aaa\"', '$' RETURNING int DEFAULT 111 ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 73, "num_statements": 1} {"question": "How many trains in Berlin pass through a station with 5000 or more daily passengers?", "schema": "CREATE TABLE train_stations (id INT, name TEXT, city TEXT, daily_passengers INT); CREATE TABLE train_routes (id INT, station_id INT, route_id INT); CREATE VIEW high_traffic_stations AS SELECT station_id FROM train_routes JOIN train_stations ON train_stations.id = station_id WHERE daily_passengers >= 5000;", "sql": "SELECT COUNT(*) FROM high_traffic_stations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the average response time for fire calls in the state of California?", "schema": "CREATE TABLE fire_calls (id INT, state VARCHAR(255), response_time FLOAT); INSERT INTO fire_calls (id, state, response_time) VALUES (1, 'California', 6.5), (2, 'California', 7.2), (3, 'Texas', 5.4);", "sql": "SELECT AVG(response_time) FROM fire_calls WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the home venue of Bengaluru FC?", "schema": "CREATE TABLE table_name_95 (home_venue VARCHAR, team VARCHAR)", "sql": "SELECT home_venue FROM table_name_95 WHERE team = 'bengaluru fc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "List all the community development initiatives in 'rural Africa' that were funded by 'international aid organizations' and their respective budgets.", "schema": "CREATE TABLE initiatives (id INT, name TEXT, region TEXT, funder TEXT, budget FLOAT); INSERT INTO initiatives (id, name, region, funder, budget) VALUES (1, 'Initiative 1', 'rural Africa', 'African Aid Organization', 100000), (2, 'Initiative 2', 'urban Africa', 'International Aid Organization', 200000);", "sql": "SELECT initiatives.name, initiatives.region, initiatives.funder, initiatives.budget FROM initiatives WHERE initiatives.region = 'rural Africa' AND initiatives.funder = 'International Aid Organization';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Suffix of the intensive and valency change of 0?", "schema": "CREATE TABLE table_name_21 (suffix VARCHAR, valency_change VARCHAR, type VARCHAR)", "sql": "SELECT suffix FROM table_name_21 WHERE valency_change = '0' AND type = 'intensive';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Delete the mental health parity report for the state of New York in 2021", "schema": "CREATE TABLE mental_health_parity_reports (report_id INT, state VARCHAR(255), year INT, total_complaints INT); INSERT INTO mental_health_parity_reports (report_id, state, year, total_complaints) VALUES (1, 'California', 2022, 250), (2, 'New York', 2021, 300), (3, 'Texas', 2022, 200);", "sql": "DELETE FROM mental_health_parity_reports WHERE state = 'New York' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many locations had a density (pop/km²) of 91.8 in the 2011 census?", "schema": "CREATE TABLE table_1425958_1 (density__pop_km²_ VARCHAR)", "sql": "SELECT COUNT(2011 AS _census) FROM table_1425958_1 WHERE density__pop_km²_ = '91.8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average age of patients with diabetes in each state?", "schema": "CREATE TABLE patients (id INT, name TEXT, age INT, state TEXT, has_diabetes BOOLEAN); INSERT INTO patients (id, name, age, state, has_diabetes) VALUES (1, 'John Doe', 65, 'California', true), (2, 'Jane Smith', 45, 'Texas', false);", "sql": "SELECT patients.state, AVG(patients.age) FROM patients WHERE patients.has_diabetes = true GROUP BY patients.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the total waste generation in gram by material type for the year 2020 in the city of San Francisco?", "schema": "CREATE TABLE waste_generation(city VARCHAR(20), year INT, material VARCHAR(20), weight FLOAT); INSERT INTO waste_generation(city, year, material, weight) VALUES ('San Francisco', 2020, 'Plastic', 1500), ('San Francisco', 2020, 'Glass', 2000), ('San Francisco', 2020, 'Paper', 1200), ('San Francisco', 2020, 'Metal', 1700), ('San Francisco', 2020, 'Organic', 2500), ('Los Angeles', 2020, 'Plastic', 2000), ('Los Angeles', 2020, 'Glass', 3000), ('Los Angeles', 2020, 'Paper', 1800), ('Los Angeles', 2020, 'Metal', 2100), ('Los Angeles', 2020, 'Organic', 3500);", "sql": "SELECT material, SUM(weight) FROM waste_generation WHERE city = 'San Francisco' AND year = 2020 GROUP BY material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Which smart city projects have a budget greater than $600,000?", "schema": "CREATE TABLE IF NOT EXISTS smart_cities ( project_id INT, project_name VARCHAR(255), budget FLOAT, PRIMARY KEY (project_id)); INSERT INTO smart_cities (project_id, project_name, budget) VALUES (1, 'Intelligent Lighting', 500000), (2, 'Smart Waste Management', 750000), (3, 'E-Governance Services', 600000);", "sql": "SELECT * FROM smart_cities WHERE budget > 600000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 94).", "schema": null, "sql": "select '$ ? (@ like_regex \"pattern\" flag \"a\")'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@ like_regex \"pattern\" flag \"a\")'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the spectral types for star mismis24-# is 1sw?", "schema": "CREATE TABLE table_10432351_1 (spectral_type VARCHAR, star__pismis24__number_ VARCHAR)", "sql": "SELECT spectral_type FROM table_10432351_1 WHERE star__pismis24__number_ = '1SW';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the total carbon sequestered by forests in a specific country, and the average carbon sequestration per hectare for those forests?", "schema": "CREATE TABLE Forests (id INT, name VARCHAR(255), hectares FLOAT, country VARCHAR(255), carbon_sequestration_tonnes INT); INSERT INTO Forests (id, name, hectares, country, carbon_sequestration_tonnes) VALUES (1, 'Amazon Rainforest', 55000000.0, 'Brazil', 120000000);", "sql": "SELECT country, SUM(carbon_sequestration_tonnes) as total_carbon_sequestration, AVG(carbon_sequestration_tonnes/hectares) as avg_carbon_sequestration_per_hectare FROM Forests GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where does the judge who serves Beacon Falls , Naugatuck , Middlebury , Prospect reside?", "schema": "CREATE TABLE table_26758262_1 (judges_residence VARCHAR, municipalities_served VARCHAR)", "sql": "SELECT judges_residence FROM table_26758262_1 WHERE municipalities_served = 'Beacon Falls , Naugatuck , Middlebury , Prospect';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On week 7, what were the opponents?", "schema": "CREATE TABLE table_25380472_2 (opponent VARCHAR, week VARCHAR)", "sql": "SELECT opponent FROM table_25380472_2 WHERE week = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which category does drama league award belong to?", "schema": "CREATE TABLE table_name_91 (category VARCHAR, award VARCHAR)", "sql": "SELECT category FROM table_name_91 WHERE award = 'drama league award';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the loss for the game against @ expos, with a save of parrett (2)?", "schema": "CREATE TABLE table_name_28 (loss VARCHAR, opponent VARCHAR, save VARCHAR)", "sql": "SELECT loss FROM table_name_28 WHERE opponent = '@ expos' AND save = 'parrett (2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for the home team Notts County?", "schema": "CREATE TABLE table_name_97 (score VARCHAR, home_team VARCHAR)", "sql": "SELECT score FROM table_name_97 WHERE home_team = 'notts county';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is varricchio's unit?", "schema": "CREATE TABLE table_name_75 (unit VARCHAR, authors VARCHAR)", "sql": "SELECT unit FROM table_name_75 WHERE authors = 'varricchio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes have a share of 16.2% and an episode number of less than 1?", "schema": "CREATE TABLE table_name_82 (total_viewers VARCHAR, share VARCHAR, episode_no VARCHAR)", "sql": "SELECT COUNT(total_viewers) FROM table_name_82 WHERE share = '16.2%' AND episode_no < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the difference in athlete wellbeing scores between the first and last game for each athlete?", "schema": "CREATE TABLE AthleteWellbeing (ScoreID INT, AthleteID INT, GameID INT, WellbeingScore INT); INSERT INTO AthleteWellbeing VALUES (1, 1, 1, 80), (2, 1, 2, 85), (3, 2, 1, 90), (4, 2, 2, 95), (5, 3, 1, 70), (6, 3, 2, 75);", "sql": "SELECT AthleteID, FIRST_VALUE(WellbeingScore) OVER (PARTITION BY AthleteID ORDER BY GameID) as FirstGameWellbeingScore, LAST_VALUE(WellbeingScore) OVER (PARTITION BY AthleteID ORDER BY GameID ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as LastGameWellbeingScore, LAST_VALUE(WellbeingScore) OVER (PARTITION BY AthleteID ORDER BY GameID ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - FIRST_VALUE(WellbeingScore) OVER (PARTITION BY AthleteID ORDER BY GameID) as Difference FROM AthleteWellbeing;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 519, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the vote % for Cliff Breitkreuz?", "schema": "CREATE TABLE table_name_24 (votes__percentage VARCHAR, candidate VARCHAR)", "sql": "SELECT votes__percentage FROM table_name_24 WHERE candidate = 'cliff breitkreuz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 296).", "schema": null, "sql": "SELECT ia1d FROM jsonb_populate_record(NULL::jsbrec, '{\"ia1d\": [1, \"2\", null, 4]}') q;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ia1d FROM jsonb_populate_record(NULL::jsbrec, '{\"ia1d\": [1, \"2\", null, 4]}') q) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Which public transportation routes have the highest frequency in Southeast Asia?", "schema": "CREATE TABLE Public_Transit_Routes (id INT PRIMARY KEY, route VARCHAR(50), mode VARCHAR(50), frequency INT, region VARCHAR(50));", "sql": "SELECT Public_Transit_Routes.route, Public_Transit_Routes.frequency FROM Public_Transit_Routes WHERE Public_Transit_Routes.region = 'Southeast Asia' ORDER BY Public_Transit_Routes.frequency DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Kilgore's percentage when Potts (I) had 4% and Kaine (D) 47%, in the poll from October 30, 2005?", "schema": "CREATE TABLE table_name_47 (kilgore__r_ VARCHAR, date VARCHAR, potts__i_ VARCHAR, kaine__d_ VARCHAR)", "sql": "SELECT kilgore__r_ FROM table_name_47 WHERE potts__i_ = '4%' AND kaine__d_ = '47%' AND date = 'october 30, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "What is the nutritional information for all organic products?", "schema": "CREATE TABLE Nutrition_Facts (Product_ID INT, Calories INT, Sodium_Milligrams INT, Sugar_Grams INT); CREATE TABLE Organic_Certified_Products (Product_ID INT, Organic_Certified INT); CREATE VIEW Organic_Products_Nutrition AS SELECT Nutrition_Facts.*, Organic_Certified_Products.Organic_Certified FROM Nutrition_Facts INNER JOIN Organic_Certified_Products ON Nutrition_Facts.Product_ID = Organic_Certified_Products.Product_ID WHERE Organic_Certified_Products.Organic_Certified = 1;", "sql": "SELECT * FROM Organic_Products_Nutrition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Add new records to the 'digital_trends' table for trends with IDs 9 and 10, names 'Machine Learning' and 'Blockchain', and popularity scores of 85 and 60, respectively", "schema": "CREATE TABLE digital_trends (trend_id INT, name VARCHAR(20), popularity_score INT);", "sql": "INSERT INTO digital_trends (trend_id, name, popularity_score) VALUES (9, 'Machine Learning', 85), (10, 'Blockchain', 60);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the total number of community policing programs in Miami and their respective budgets?\"", "schema": "CREATE TABLE miami_community_policing (id INT, program_name VARCHAR(255), city VARCHAR(255), budget INT); INSERT INTO miami_community_policing (id, program_name, city, budget) VALUES (1, 'Bike Patrol', 'Miami', 60000);", "sql": "SELECT program_name, SUM(budget) as total_budget FROM miami_community_policing WHERE city = 'Miami' GROUP BY program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the February number of the game with the Vancouver Canucks as the opponent and a game number greater than 55?", "schema": "CREATE TABLE table_name_48 (february VARCHAR, opponent VARCHAR, game VARCHAR)", "sql": "SELECT COUNT(february) FROM table_name_48 WHERE opponent = 'vancouver canucks' AND game > 55;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 47).", "schema": null, "sql": "create function polyf(anycompatiblerange) returns anycompatiblemultirange\nas 'select multirange($1);' language sql;", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 2} {"question": "Find the percentage change in transportation budget between 2021 and 2022 in Texas.", "schema": "CREATE TABLE TransportationBudget (state VARCHAR(20), year INT, budget INT); INSERT INTO TransportationBudget (state, year, budget) VALUES ('Texas', 2021, 5000000), ('Texas', 2022, 5500000);", "sql": "SELECT ((new_budget - old_budget) * 100.0 / old_budget) AS pct_change FROM (SELECT b1.budget AS old_budget, b2.budget AS new_budget FROM TransportationBudget b1, TransportationBudget b2 WHERE b1.state = b2.state AND b1.year = 2021 AND b2.year = 2022 AND b1.state = 'Texas') AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 848).", "schema": null, "sql": "select div(999999999999999999999::numeric,1000000000000000000000);", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select div(999999999999999999999::numeric,1000000000000000000000)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 38).", "schema": null, "sql": "DROP FUNCTION trigger_data() CASCADE;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the week on December 10, 1989?", "schema": "CREATE TABLE table_name_39 (week VARCHAR, date VARCHAR)", "sql": "SELECT week FROM table_name_39 WHERE date = 'december 10, 1989';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 35).", "schema": null, "sql": "CREATE OPERATOR ~ (\n\tLEFTARG = hstore,\n\tRIGHTARG = hstore,\n\tPROCEDURE = hs_contained,\n\tCOMMUTATOR = '@',\n\tRESTRICT = contsel,\n\tJOIN = contjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 148, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 46).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.to_unix_microseconds(ts timestamp with time zone) RETURNS bigint LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.to_unix_microseconds(timestamp with time zone) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n RETURN _timescaledb_functions.to_unix_microseconds($1);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 567, "num_statements": 4} {"question": "What is the total number of military equipment sales for each defense contractor in the past 12 months, along with the name of the contractor and the total sales volume?", "schema": "CREATE TABLE military_equipment_sales (id INT, defense_contractor_id INT, sale_date DATE, sales INT); INSERT INTO military_equipment_sales (id, defense_contractor_id, sale_date, sales) VALUES (1, 1, '2021-01-01', 5000000), (2, 1, '2021-02-01', 7000000), (3, 2, '2021-03-01', 6000000), (4, 3, '2021-04-01', 8000000); CREATE TABLE defense_contractors (id INT, name VARCHAR(255)); INSERT INTO defense_contractors (id, name) VALUES (1, 'Lockheed Martin'), (2, 'Boeing'), (3, 'Raytheon');", "sql": "SELECT d.name, SUM(m.sales) as total_sales FROM military_equipment_sales m JOIN defense_contractors d ON m.defense_contractor_id = d.id WHERE m.sale_date >= DATEADD(year, -1, GETDATE()) GROUP BY d.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "What percentage of patients with PTSD in Japan experienced improvement after medication?", "schema": "CREATE TABLE patients (id INT, improvement TEXT, condition TEXT, country TEXT); INSERT INTO patients (id, improvement, condition, country) VALUES (1, 'Improved', 'PTSD', 'Japan'); INSERT INTO patients (id, improvement, condition, country) VALUES (2, 'Not Improved', 'Anxiety', 'USA');", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM patients WHERE condition = 'PTSD')) AS percentage FROM patients WHERE improvement = 'Improved' AND condition = 'PTSD' AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location for a club of telecom?", "schema": "CREATE TABLE table_name_61 (location VARCHAR, club VARCHAR)", "sql": "SELECT location FROM table_name_61 WHERE club = 'telecom';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 101).", "schema": null, "sql": "SELECT SUBSTRING('abcdefg' SIMILAR 'a#\"%#\"x|g' ESCAPE '#') AS \"bcdef\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT SUBSTRING('abcdefg' SIMILAR 'a#\"%#\"x|g' ESCAPE '#') AS \"bcdef\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'index_including' (example 91).", "schema": null, "sql": "INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,1000) AS x;", "explanation": "DML from PostgreSQL core regression test for Index Including.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'vacuum' (example 250).", "schema": null, "sql": "CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);", "explanation": "DDL from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who write the episode 5 in no. in season", "schema": "CREATE TABLE table_19417244_2 (written_by VARCHAR, no_in_season VARCHAR)", "sql": "SELECT written_by FROM table_19417244_2 WHERE no_in_season = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What are the building permit numbers, permit types, and permit issuance dates for projects that have started in Q1 2022?", "schema": "CREATE TABLE building_permits (permit_number INT, permit_type TEXT, issuance_date DATE); CREATE TABLE project_timelines (project_id INT, permit_number INT, start_date DATE); INSERT INTO building_permits (permit_number, permit_type, issuance_date) VALUES (1, 'Residential', '2022-01-15'), (2, 'Commercial', '2022-02-20'), (3, 'Residential', '2021-12-10'), (4, 'Sustainable', '2022-03-05'); INSERT INTO project_timelines (project_id, permit_number, start_date) VALUES (1, 1, '2022-02-01'), (2, 2, '2022-03-10'), (3, 3, '2021-12-15'), (4, 4, '2022-03-15');", "sql": "SELECT building_permits.permit_number, building_permits.permit_type, building_permits.issuance_date FROM building_permits INNER JOIN project_timelines ON building_permits.permit_number = project_timelines.permit_number WHERE project_timelines.start_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "Delete records of workouts with a duration more than 90 minutes for all members from the 'Workouts' table", "schema": "CREATE TABLE Workouts (WorkoutID INT, MemberID INT, Duration INT, MembershipType VARCHAR(20));", "sql": "DELETE FROM Workouts WHERE Duration > 90;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 11).", "schema": null, "sql": "SELECT * FROM test_date WHERE i<'2004-10-26'::timestamp ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many labor rights violations have been reported in each region?", "schema": "CREATE TABLE Labor_Rights (region VARCHAR(20), violation_reported BOOLEAN); INSERT INTO Labor_Rights (region, violation_reported) VALUES ('Northeast', true), ('Northeast', false), ('Midwest', true);", "sql": "SELECT region, SUM(violation_reported) as total_violations FROM Labor_Rights GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the average price of gluten-free dishes?", "schema": "CREATE TABLE menu_pricing_2 (item_id INT, item_name VARCHAR(50), is_gluten_free BOOLEAN, price DECIMAL(5, 2)); INSERT INTO menu_pricing_2 (item_id, item_name, is_gluten_free, price) VALUES (1, 'Pizza', FALSE, 12.99), (2, 'Salad', TRUE, 9.99), (3, 'Soup', FALSE, 5.99);", "sql": "SELECT AVG(price) FROM menu_pricing_2 WHERE is_gluten_free = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total water usage by residential customers in a specific neighborhood in 2022?", "schema": "CREATE TABLE water_usage (id INT PRIMARY KEY, customer_id INT, usage_date DATE, usage_type VARCHAR(255), amount FLOAT); CREATE TABLE customers (id INT PRIMARY KEY, name VARCHAR(255), neighborhood VARCHAR(255));", "sql": "SELECT SUM(wu.amount) as total_water_usage FROM water_usage wu JOIN customers c ON wu.customer_id = c.id WHERE wu.usage_type = 'Residential' AND wu.usage_date BETWEEN '2022-01-01' AND '2022-12-31' AND c.neighborhood = 'Specific Neighborhood';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What opponent has a result of 3–6, 6–2, 4–6?", "schema": "CREATE TABLE table_name_74 (opponent VARCHAR, result VARCHAR)", "sql": "SELECT opponent FROM table_name_74 WHERE result = '3–6, 6–2, 4–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average humidity (%) in tea plantations in Sri Lanka during the last month?", "schema": "CREATE TABLE humidity_data (humidity DECIMAL(3,1), reading_date DATE, location TEXT); INSERT INTO humidity_data (humidity, reading_date, location) VALUES (82.5, '2021-07-01', 'Sri Lanka'), (85.3, '2021-07-02', 'Sri Lanka'), (79.2, '2021-01-01', 'Sri Lanka');", "sql": "SELECT AVG(humidity) FROM humidity_data WHERE location = 'Sri Lanka' AND reading_date > DATE_SUB(CURDATE(), INTERVAL 1 MONTH) AND location LIKE '%tea%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many heats did Runners from Guinea-Bissau run, with rank higher than 33?", "schema": "CREATE TABLE table_name_78 (heat INTEGER, nationality VARCHAR, rank VARCHAR)", "sql": "SELECT SUM(heat) FROM table_name_78 WHERE nationality = 'guinea-bissau' AND rank < 33;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_mappings, item 21).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION zdb.define_analyzer(name text, definition json) RETURNS void\n LANGUAGE sql\n VOLATILE STRICT AS\n$$\nDELETE\nFROM zdb.analyzers\nWHERE name = $1;\nINSERT INTO zdb.analyzers(name, definition)\nVALUES ($1, $2);\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 238, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is Year Named, when Longitude is 227.5E?", "schema": "CREATE TABLE table_name_42 (year_named VARCHAR, longitude VARCHAR)", "sql": "SELECT year_named FROM table_name_42 WHERE longitude = '227.5e';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which menu category has the highest inventory value for non-organic items?", "schema": "CREATE TABLE non_organic_categories (id INT, category VARCHAR(255), total_value DECIMAL(5,2)); INSERT INTO non_organic_categories (id, category, total_value) VALUES (1, 'Proteins', 199.00), (2, 'Grains', 74.25);", "sql": "SELECT category, total_value FROM non_organic_categories ORDER BY total_value DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 253).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '{\"d1\": \"H\"}', '$.a2' RETURNING queryfuncs_test_domain DEFAULT 'foo'::queryfuncs_test_domain ON EMPTY);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '{\"d1\": \"H\"}', '$.a2' RETURNING queryfuncs_test_domain DEFAULT 'foo'::queryfuncs_test_domain ON EMPTY)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 127, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 77).", "schema": null, "sql": "CREATE TABLE tbl3 (e int, f int);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Error (example 56).", "schema": null, "sql": "/* test error logged with an underlying exception that includes a detail\n * string (bug #18070).\n */\nCREATE FUNCTION python_error_detail() RETURNS SETOF text AS $$\n plan = plpy.prepare(\"SELECT to_date('xy', 'DD') d\")\n for row in plpy.cursor(plan):\n yield row['d']\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Error.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the student ID and personal name of the student with at least two enrollments.", "schema": "CREATE TABLE Student_Course_Enrolment (student_id VARCHAR); CREATE TABLE Students (personal_name VARCHAR, student_id VARCHAR)", "sql": "SELECT T1.student_id, T2.personal_name FROM Student_Course_Enrolment AS T1 JOIN Students AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id HAVING COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the geopolitical risk assessment for country Y?", "schema": "CREATE TABLE geopolitical_risk (country VARCHAR, risk_level VARCHAR, assessment_date DATE);", "sql": "SELECT risk_level FROM geopolitical_risk WHERE country = 'Country Y';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION seg_out(seg)\nRETURNS cstring\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "What is the average weight of cargo imported and exported by the company 'Poseidon Shipping'", "schema": "CREATE TABLE ports (id INT, name VARCHAR(50), country VARCHAR(50)); CREATE TABLE cargo_operations (id INT, port_id INT, company VARCHAR(50), type VARCHAR(50), weight INT); INSERT INTO ports (id, name, country) VALUES (1, 'Port of Oakland', 'USA'), (2, 'Port of Singapore', 'Singapore'), (3, 'Port of Hong Kong', 'China'), (4, 'Port of Rotterdam', 'Netherlands'); INSERT INTO cargo_operations (id, port_id, company, type, weight) VALUES (1, 1, 'Poseidon Shipping', 'import', 5000), (2, 1, 'Poseidon Shipping', 'export', 7000), (3, 3, 'Poseidon Shipping', 'import', 8000), (4, 3, 'Poseidon Shipping', 'export', 9000), (5, 4, 'Poseidon Shipping', 'import', 6000), (6, 4, 'Poseidon Shipping', 'export', 4000), (7, 1, 'Poseidon Shipping', 'import', 5500), (8, 1, 'Poseidon Shipping', 'export', 7500);", "sql": "SELECT AVG(weight) FROM cargo_operations WHERE company = 'Poseidon Shipping' AND (type = 'import' OR type = 'export');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 313).", "schema": null, "sql": "CREATE OPERATOR >= (\n\tPROCEDURE = isnge,\n\tLEFTARG = ismn,\n\tRIGHTARG = ismn13,\n\tCOMMUTATOR = <=,\n\tNEGATOR = <,\n\tRESTRICT = scalargtsel,\n\tJOIN = scalargtjoinsel );", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many countries had a per capita withdrawal (m 3 /p/yr) of 372?", "schema": "CREATE TABLE table_15909409_2 (country VARCHAR, per_capita_withdrawal__m_3__p_yr_ VARCHAR)", "sql": "SELECT COUNT(country) FROM table_15909409_2 WHERE per_capita_withdrawal__m_3__p_yr_ = 372;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What are the flight numbers and airlines of flights with safety incidents reported in the year 2022?", "schema": "CREATE TABLE Flights (id INT PRIMARY KEY, flight_number VARCHAR(10), airline VARCHAR(50), departure_date DATE); INSERT INTO Flights (id, flight_number, airline, departure_date) VALUES (1, 'UA123', 'United Airlines', '2022-01-01'), (2, 'DL456', 'Delta Airlines', '2022-02-01');", "sql": "SELECT f.flight_number, f.airline FROM Flights f INNER JOIN SafetyIncidents si ON f.id = si.flight_id WHERE YEAR(si.incident_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the total number of clothing items made from sustainable materials?", "schema": "CREATE TABLE Clothing (id INT, sustainable BOOLEAN); INSERT INTO Clothing VALUES (1, true), (2, false), (3, true), (4, true), (5, false); CREATE TABLE SustainableMaterials (id INT, clothing_id INT, material TEXT); INSERT INTO SustainableMaterials VALUES (1, 1, 'OrganicCotton'), (2, 3, 'Tencel'), (3, 4, 'Hemp'), (4, 2, 'Bamboo');", "sql": "SELECT COUNT(*) FROM Clothing INNER JOIN SustainableMaterials ON Clothing.id = SustainableMaterials.clothing_id WHERE Clothing.sustainable = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What is the average age of artists who performed at music festivals in Germany in 2021?", "schema": "CREATE TABLE artists (artist_id INT, artist_name VARCHAR(255), artist_age INT, country VARCHAR(255)); INSERT INTO artists (artist_id, artist_name, artist_age, country) VALUES (1, 'John Doe', 35, 'USA'); CREATE TABLE festival_performances (performance_id INT, artist_id INT, festival_name VARCHAR(255), festival_country VARCHAR(255), festival_year INT); INSERT INTO festival_performances (performance_id, artist_id, festival_name, festival_country, festival_year) VALUES (1, 1, 'MusicFest', 'Germany', 2021);", "sql": "SELECT AVG(a.artist_age) FROM artists a JOIN festival_performances fp ON a.artist_id = fp.artist_id WHERE fp.festival_country = 'Germany' AND fp.festival_year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Virtual tour usage in the Asia Pacific region.", "schema": "CREATE TABLE virtual_tours (tour_id INT, name TEXT, region TEXT); INSERT INTO virtual_tours (tour_id, name, region) VALUES (1, 'Asia Pacific Tour', 'Asia Pacific');", "sql": "SELECT region, COUNT(*) FROM virtual_tours WHERE region = 'Asia Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average rank for Hungary?", "schema": "CREATE TABLE table_name_82 (rank INTEGER, country VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_82 WHERE country = 'hungary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'jsonb' (example 896).", "schema": null, "sql": "update test_jsonb_subscript set test_json['a'][1]['c'][2] = '1';", "explanation": "DML from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest points number where the difference is 12?", "schema": "CREATE TABLE table_name_69 (points INTEGER, difference VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_69 WHERE difference = '12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the call sign is DXYR, what is the branding?", "schema": "CREATE TABLE table_19874169_3 (branding VARCHAR, callsign VARCHAR)", "sql": "SELECT branding FROM table_19874169_3 WHERE callsign = 'DXYR';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total for the place when the singer is Mariza Ikonomi when points are larger than 20?", "schema": "CREATE TABLE table_name_88 (place VARCHAR, singer VARCHAR, points VARCHAR)", "sql": "SELECT COUNT(place) FROM table_name_88 WHERE singer = 'mariza ikonomi' AND points > 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of workplaces with a safety rating above 85 in the 'workplace_safety' table?", "schema": "CREATE TABLE workplace_safety (safety_rating INT, workplace_id INT);", "sql": "SELECT COUNT(*) FROM workplace_safety WHERE safety_rating > 85;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 34).", "schema": null, "sql": "SELECT '[{\"attributes\" : [], \"ndistinct\" : 1}]'::pg_ndistinct;", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [], \"ndistinct\" : 1}]'::pg_ndistinct) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Update the population of the Giant Panda to 1800.", "schema": "CREATE TABLE Animals (id INT, name VARCHAR(255), population INT, status VARCHAR(255)); INSERT INTO Animals (id, name, population, status) VALUES (1, 'Amur Leopard', 90, 'Endangered'), (2, 'Black Rhino', 5000, 'Critically Endangered'), (3, 'Giant Panda', 1600, 'Vulnerable');", "sql": "UPDATE Animals SET population = 1800 WHERE name = 'Giant Panda';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which month in 2013 did the U.S. use the ruby birthstone?", "schema": "CREATE TABLE table_name_76 (month VARCHAR, us__2013_ VARCHAR)", "sql": "SELECT month FROM table_name_76 WHERE us__2013_ = 'ruby';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the season with a Skip of Eve Muirhead and a third of Kerry Barr?", "schema": "CREATE TABLE table_name_42 (season VARCHAR, skip VARCHAR, third VARCHAR)", "sql": "SELECT season FROM table_name_42 WHERE skip = 'eve muirhead' AND third = 'kerry barr';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the segment d for s cufflink", "schema": "CREATE TABLE table_15187735_18 (segment_d VARCHAR, segment_a VARCHAR)", "sql": "SELECT segment_d FROM table_15187735_18 WHERE segment_a = 's Cufflink';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Delete records with a city of 'Los Angeles' from the warehouse table", "schema": "CREATE TABLE warehouse (warehouse_id INT, warehouse_name VARCHAR(50), city VARCHAR(50), country VARCHAR(50));", "sql": "DELETE FROM warehouse WHERE city = 'Los Angeles';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 34).", "schema": null, "sql": "SELECT latitude(ll_to_earth(45,0))::numeric(20,10);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many dogs went through any treatments?", "schema": "CREATE TABLE Treatments (dog_id VARCHAR)", "sql": "SELECT COUNT(DISTINCT dog_id) FROM Treatments;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Player in T5 Place?", "schema": "CREATE TABLE table_name_71 (player VARCHAR, place VARCHAR)", "sql": "SELECT player FROM table_name_71 WHERE place = 't5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which company's type is joint venture, and has principle activities listed as Cargo Airline and an incorporation of China?", "schema": "CREATE TABLE table_name_37 (company VARCHAR, incorporated_in VARCHAR, type VARCHAR, principal_activities VARCHAR)", "sql": "SELECT company FROM table_name_37 WHERE type = 'joint venture' AND principal_activities = 'cargo airline' AND incorporated_in = 'china';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 154).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (3,5,'70671.23589621');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Which destinations in Africa have implemented more than 3 sustainable practices and have a travel warning issued?", "schema": "CREATE TABLE sustainable_practices (destination_id INT, practice_id INT, year INT, implemented BOOLEAN); CREATE VIEW sustainable_destinations AS SELECT destination_id, COUNT(*) as num_practices FROM sustainable_practices WHERE implemented = TRUE GROUP BY destination_id; CREATE TABLE travel_warnings (destination_id INT, warning_level VARCHAR(10), warning_text TEXT, start_date DATE, end_date DATE); CREATE VIEW current_travel_warnings AS SELECT destination_id FROM travel_warnings WHERE CURRENT_DATE BETWEEN start_date AND end_date;", "sql": "SELECT d.country FROM destinations d JOIN sustainable_destinations s ON d.id = s.destination_id JOIN current_travel_warnings ctw ON d.id = ctw.destination_id WHERE d.region = 'Africa' AND s.num_practices > 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "What is the maximum budget allocated for any category in the North region in the year 2020?", "schema": "CREATE TABLE Budget (Year INT, Region VARCHAR(50), Category VARCHAR(50), Amount INT); INSERT INTO Budget (Year, Region, Category, Amount) VALUES (2020, 'North', 'Education', 5000000), (2020, 'North', 'Public Transportation', 6000000);", "sql": "SELECT MAX(Amount) FROM Budget WHERE Year = 2020 AND Region = 'North';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the revenue by day of the week for all restaurants?", "schema": "CREATE TABLE sales (id INT, restaurant_id INT, sale_date DATE, sales DECIMAL(10,2)); CREATE VIEW day_sales AS SELECT restaurant_id, EXTRACT(DOW FROM sale_date) as day_of_week, SUM(sales) as total_sales FROM sales GROUP BY restaurant_id, day_of_week;", "sql": "SELECT d.day_of_week, SUM(ds.total_sales) as total_sales FROM day_sales ds JOIN day_sales d ON ds.day_of_week = d.day_of_week GROUP BY d.day_of_week;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest round in which a player was picked for the Center position?", "schema": "CREATE TABLE table_name_24 (round INTEGER, position VARCHAR)", "sql": "SELECT MAX(round) FROM table_name_24 WHERE position = 'center';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the minimum and maximum number of animals on pasture-based farms in Wisconsin?", "schema": "CREATE TABLE pasture_farms (id INT, farm_name VARCHAR(50), state VARCHAR(20), num_animals INT); INSERT INTO pasture_farms (id, farm_name, state, num_animals) VALUES (1, 'Farm 1', 'Wisconsin', 50), (2, 'Farm 2', 'Wisconsin', 75), (3, 'Farm 3', 'Wisconsin', 100), (4, 'Farm 4', 'Wisconsin', 125), (5, 'Farm 5', 'Wisconsin', 150);", "sql": "SELECT state, MIN(num_animals) as min_animals, MAX(num_animals) as max_animals FROM pasture_farms WHERE state = 'Wisconsin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many top 5s when he was on team #28/#49 jay robinson racing?", "schema": "CREATE TABLE table_2216245_2 (top_5 INTEGER, team_s_ VARCHAR)", "sql": "SELECT MAX(top_5) FROM table_2216245_2 WHERE team_s_ = '#28/#49 Jay Robinson Racing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 211).", "schema": null, "sql": "SELECT cube(array[10,20,30], array[40,50,60])->6;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 49, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Call (example 43).", "schema": null, "sql": "CREATE PROCEDURE test_proc10(IN a int, OUT b int, IN c int DEFAULT 11)\nLANGUAGE plpgsql\nAS $$\nBEGIN\n RAISE NOTICE 'a: %, b: %, c: %', a, b, c;\n b := a - c;\nEND;\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Call.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 166, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: What year did jaroslav vojtek category:articles with hcards Direct?", "schema": "CREATE TABLE table_22032599_1 (year__ceremony_ VARCHAR, director VARCHAR)", "sql": "SELECT year__ceremony_ FROM table_22032599_1 WHERE director = 'Jaroslav Vojtek Category:Articles with hCards';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "List the names and types of all farms in the 'farming' database that are located in a specific region.", "schema": "CREATE TABLE farm (id INT, name VARCHAR(255), type VARCHAR(255), region VARCHAR(255)); INSERT INTO farm (id, name, type, region) VALUES (1, 'Smith Farm', 'organic', 'Midwest'), (2, 'Johnson Farm', 'conventional', 'South'), (3, 'Brown Farm', 'organic', 'Midwest'), (4, 'Davis Farm', 'conventional', 'West');", "sql": "SELECT name, type FROM farm WHERE region = 'Midwest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many draws were there when there were points less than 23, 6 losses, and more than 21 against?", "schema": "CREATE TABLE table_name_49 (drawn VARCHAR, against VARCHAR, points VARCHAR, lost VARCHAR)", "sql": "SELECT COUNT(drawn) FROM table_name_49 WHERE points < 23 AND lost = 6 AND against > 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of marine species that have been observed in the Indian Ocean?", "schema": "CREATE TABLE species (id INT, name TEXT, location TEXT); INSERT INTO species (id, name, location) VALUES (1, 'Clownfish', 'Indian Ocean'); INSERT INTO species (id, name, location) VALUES (2, 'Dolphin', 'Atlantic Ocean');", "sql": "SELECT COUNT(*) FROM species WHERE location = 'Indian Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When zachary sanders is the performer how many first aired are there?", "schema": "CREATE TABLE table_191105_2 (first_aired VARCHAR, performed_by VARCHAR)", "sql": "SELECT COUNT(first_aired) FROM table_191105_2 WHERE performed_by = 'Zachary Sanders';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "List all unique digital initiatives by museums located in the Asia-Pacific region.", "schema": "CREATE TABLE Digital_Initiatives (id INT, museum VARCHAR(255), initiative VARCHAR(255)); INSERT INTO Digital_Initiatives (id, museum, initiative) VALUES (1, 'National Museum of Australia', 'Virtual Tour'), (2, 'British Museum', 'Online Collection'), (3, 'Metropolitan Museum of Art', 'Digital Archive'), (4, 'National Museum of China', 'Interactive Exhibit');", "sql": "SELECT DISTINCT initiative FROM Digital_Initiatives WHERE museum LIKE 'National Museum%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game when the record of the series was 0–1?", "schema": "CREATE TABLE table_name_10 (date VARCHAR, series VARCHAR)", "sql": "SELECT date FROM table_name_10 WHERE series = '0–1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Insert new records for 3 autonomous shuttle stations in Seattle into the 'autonomous_stations' table", "schema": "CREATE TABLE autonomous_stations (id INT, station_name VARCHAR(255), location VARCHAR(255), num_vehicles INT);", "sql": "INSERT INTO autonomous_stations (id, station_name, location, num_vehicles) VALUES (1, 'Seattle Shuttle 1', 'Seattle', 5), (2, 'Seattle Shuttle 2', 'Seattle', 5), (3, 'Seattle Shuttle 3', 'Seattle', 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Transaction (example 94).", "schema": null, "sql": "INSERT INTO test3 (y) VALUES (3); -- won't get here\nEND;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Transaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What is Brian Mateer's Speed?", "schema": "CREATE TABLE table_name_1 (speed VARCHAR, rider VARCHAR)", "sql": "SELECT speed FROM table_name_1 WHERE rider = 'brian mateer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Catalog number of the CD Single?", "schema": "CREATE TABLE table_name_13 (catalog VARCHAR, format VARCHAR)", "sql": "SELECT catalog FROM table_name_13 WHERE format = 'cd single';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of students enrolled in each district, and what is the average mental health score for students in each district?", "schema": "CREATE TABLE districts (district_id INT, district_name TEXT); CREATE TABLE students (student_id INT, district_id INT, mental_health_score INT); INSERT INTO districts VALUES (1, 'District A'), (2, 'District B'); INSERT INTO students VALUES (1, 1, 60), (2, 1, 75), (3, 2, 45), (4, 2, 30);", "sql": "SELECT d.district_name, COUNT(s.student_id) as num_students, AVG(s.mental_health_score) as avg_mental_health_score FROM students s JOIN districts d ON s.district_id = d.district_id GROUP BY s.district_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "What is the total number of art exhibitions and dance events in the year 2020?", "schema": "CREATE TABLE ArtExhibitions (id INT, year INT, visitors INT); INSERT INTO ArtExhibitions (id, year, visitors) VALUES (1, 2018, 500), (2, 2019, 700), (3, 2020, 600), (4, 2021, 800); CREATE TABLE DanceEvents (id INT, year INT, visitors INT); INSERT INTO DanceEvents (id, year, visitors) VALUES (1, 2019, 300), (2, 2020, 400), (3, 2021, 500);", "sql": "SELECT SUM(visitors) FROM ArtExhibitions WHERE year = 2020; SELECT SUM(visitors) FROM DanceEvents WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Where did Collingwood play as the home team?", "schema": "CREATE TABLE table_name_28 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_28 WHERE home_team = 'collingwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.2.0--1.3.0 (assertion 2).", "schema": null, "sql": "-- col_is_pk( schema, table, column[] )\nCREATE OR REPLACE FUNCTION col_is_pk ( NAME, NAME, NAME[] )\nRETURNS TEXT AS $$\n SELECT col_is_pk( $1, $2, $3, 'Columns ' || quote_ident($1) || '.' || quote_ident($2) || '(' || _ident_array_to_string($3, ', ') || ') should be a primary key' );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.2.0--1.3.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 302, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: When batavo is the main sponsor and olympikus is the kit manufacturer who are the minor sponsors?", "schema": "CREATE TABLE table_187239_1 (minor_sponsors VARCHAR, kit_manufacturer VARCHAR, main_sponsor VARCHAR)", "sql": "SELECT minor_sponsors FROM table_187239_1 WHERE kit_manufacturer = 'Olympikus' AND main_sponsor = 'Batavo';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 103).", "schema": null, "sql": "SELECT * FROM check_test(\n groups_are( ___mygroups('meanies'), 'whatever' ),\n false,\n 'groups_are(groups, desc) extras',\n 'whatever',\n ' Extra groups:\n meanies'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Which countries have participated in more than two peacekeeping operations?", "schema": "CREATE TABLE Country (ID INT, Name VARCHAR(50)); INSERT INTO Country (ID, Name) VALUES (1, 'USA'), (2, 'Canada'), (3, 'Mexico'), (4, 'Brazil'), (5, 'Argentina'); CREATE TABLE Mission (ID INT, CountryID INT, Type VARCHAR(50), Year INT); INSERT INTO Mission (ID, CountryID, Type, Year) VALUES (1, 1, 'Peacekeeping', 2010), (2, 1, 'Peacekeeping', 2012), (3, 2, 'Peacekeeping', 2015), (4, 3, 'Peacekeeping', 2018), (5, 4, 'Peacekeeping', 2010), (6, 4, 'Peacekeeping', 2012), (7, 4, 'Peacekeeping', 2014), (8, 5, 'Peacekeeping', 2019), (9, 5, 'Peacekeeping', 2020);", "sql": "SELECT Country.Name FROM Country JOIN Mission ON Country.ID = Mission.CountryID WHERE Mission.Type = 'Peacekeeping' GROUP BY Country.Name HAVING COUNT(*) > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the total revenue generated from ads on Facebook in Q2 2021, for users in the 'celebrity' category?", "schema": "CREATE TABLE ads (ad_id INT, user_id INT, platform VARCHAR(255), ad_revenue DECIMAL(10,2)); INSERT INTO ads (ad_id, user_id, platform, ad_revenue) VALUES (1, 1, 'Facebook', 1500.50), (2, 2, 'Twitter', 800.00), (3, 3, 'Facebook', 1200.75);", "sql": "SELECT SUM(ad_revenue) FROM ads WHERE platform = 'Facebook' AND MONTH(ad_date) BETWEEN 4 AND 6 AND YEAR(ad_date) = 2021 AND user_id IN (SELECT user_id FROM users WHERE category = 'celebrity');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Update address in 'rural_clinics' where id=1", "schema": "CREATE TABLE if not exists 'rural_clinics' (id INT, name TEXT, address TEXT, PRIMARY KEY(id));", "sql": "UPDATE 'rural_clinics' SET address = 'New Address' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which employee has showed up in most circulation history documents. List the employee's name and the number of drafts and copies.", "schema": "CREATE TABLE Circulation_History (Id VARCHAR); CREATE TABLE Employees (Id VARCHAR)", "sql": "SELECT Employees.employee_name, COUNT(*) FROM Employees JOIN Circulation_History ON Circulation_History.employee_id = Employees.employee_id GROUP BY Circulation_History.document_id, Circulation_History.draft_number, Circulation_History.copy_number ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 279, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the draw happen?", "schema": "CREATE TABLE table_name_76 (date VARCHAR, result VARCHAR)", "sql": "SELECT date FROM table_name_76 WHERE result = 'draw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'uuid' (example 13).", "schema": null, "sql": "INSERT INTO guid1(guid_field) VALUES('{22222222-2222-2222-2222-222222222222}');", "explanation": "DML from PostgreSQL core regression test for Uuid.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for the game that has an attendance of 5,298?", "schema": "CREATE TABLE table_name_9 (score VARCHAR, attendance VARCHAR)", "sql": "SELECT score FROM table_name_9 WHERE attendance = '5,298';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many group legs were won with player Mark Dudbridge?", "schema": "CREATE TABLE table_24334163_1 (Winners VARCHAR, player VARCHAR)", "sql": "SELECT Winners AS group_legs_won FROM table_24334163_1 WHERE player = 'Mark Dudbridge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Delete the OTA data for the 'NA' region from the 'ota_stats' table that is older than 2021.", "schema": "CREATE TABLE ota_stats (id INT, ota_name TEXT, region TEXT, date DATE, clicks INT, bookings INT); INSERT INTO ota_stats (id, ota_name, region, date, clicks, bookings) VALUES (1, 'OTA1', 'NA', '2021-01-01', 100, 20), (2, 'OTA2', 'NA', '2021-02-01', 150, 30), (3, 'OTA3', 'EMEA', '2021-01-01', 50, 10);", "sql": "DELETE FROM ota_stats WHERE region = 'NA' AND date < '2021-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Country has a Network of nelonen?", "schema": "CREATE TABLE table_name_61 (country VARCHAR, network VARCHAR)", "sql": "SELECT country FROM table_name_61 WHERE network = 'nelonen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest total number of medals for rank 11 and more than 0 silver medals?", "schema": "CREATE TABLE table_name_25 (total INTEGER, rank VARCHAR, silver VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_25 WHERE rank = '11' AND silver > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all marine species that are found in more than one ocean basin", "schema": "CREATE TABLE species (id INT, name VARCHAR(255), habitat VARCHAR(255)); CREATE TABLE ocean_basin (id INT, name VARCHAR(255)); CREATE TABLE species_ocean_basin (species_id INT, ocean_basin_id INT);", "sql": "SELECT species.name FROM species JOIN species_ocean_basin ON species.id = species_ocean_basin.species_id JOIN ocean_basin ON species_ocean_basin.ocean_basin_id = ocean_basin.id GROUP BY species.name HAVING COUNT(DISTINCT ocean_basin.name) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "How many donations were made in the first half of 2021?", "schema": "CREATE TABLE Donations (donation_id INT, donation_amount FLOAT, donation_date DATE); INSERT INTO Donations (donation_id, donation_amount, donation_date) VALUES (1, 500.00, '2021-01-01'), (2, 300.00, '2021-01-15'), (3, 400.00, '2021-02-20'), (4, 250.00, '2021-03-10'), (5, 600.00, '2021-03-15'), (6, 100.00, '2021-07-04');", "sql": "SELECT COUNT(*) FROM Donations WHERE YEAR(donation_date) = 2021 AND MONTH(donation_date) <= 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the copa mercosur 1999 result for team grêmio, who did not qualify for the Copa Libertadores 1999 and Copa Conmebol 1999?", "schema": "CREATE TABLE table_name_51 (copa_mercosur_1999 VARCHAR, team VARCHAR, copa_libertadores_1999 VARCHAR, copa_conmebol_1999 VARCHAR)", "sql": "SELECT copa_mercosur_1999 FROM table_name_51 WHERE copa_libertadores_1999 = 'did not qualify' AND copa_conmebol_1999 = 'did not qualify' AND team = 'grêmio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the title is Paradox Lost and the reader is Briggs, Nicholas Nicholas Briggs, what are all of the notes?", "schema": "CREATE TABLE table_20174050_7 (notes VARCHAR, reader VARCHAR, title VARCHAR)", "sql": "SELECT notes FROM table_20174050_7 WHERE reader = 'Briggs, Nicholas Nicholas Briggs' AND title = 'Paradox Lost';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What week has a Record of 12-2?", "schema": "CREATE TABLE table_name_99 (week VARCHAR, record VARCHAR)", "sql": "SELECT week FROM table_name_99 WHERE record = '12-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the fastest lap in the dutch grand prix?", "schema": "CREATE TABLE table_1140077_2 (fastest_lap VARCHAR, race VARCHAR)", "sql": "SELECT fastest_lap FROM table_1140077_2 WHERE race = 'Dutch Grand Prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_mappings, item 19).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION zdb.define_filter(name text, definition json) RETURNS void\n LANGUAGE sql\n VOLATILE STRICT AS\n$$\nDELETE\nFROM zdb.filters\nWHERE name = $1;\nINSERT INTO zdb.filters(name, definition)\nVALUES ($1, $2);\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 232, "num_statements": 3} {"question": "Rank exhibitions by the number of repeat visitors, ordered from the highest to the lowest?", "schema": "CREATE TABLE Exhibitions (ExhibitionID INT, Name VARCHAR(50)); INSERT INTO Exhibitions (ExhibitionID, Name) VALUES (1, 'Impressionists'), (2, 'Ancient Art'); CREATE TABLE Visits (VisitID INT, VisitorID INT, ExhibitionID INT); INSERT INTO Visits (VisitID, VisitorID, ExhibitionID) VALUES (1, 1, 1), (2, 2, 1), (3, 3, 2), (4, 1, 1), (5, 4, 2);", "sql": "SELECT ExhibitionID, Name, RANK() OVER (ORDER BY COUNT(VisitorID) DESC) AS RepeatVisitorRank FROM Visits V JOIN Exhibitions E ON V.ExhibitionID = E.ExhibitionID GROUP BY ExhibitionID, Name ORDER BY RepeatVisitorRank DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 221, "num_statements": 1} {"question": "What is the total number of safety tests conducted by 'FutureAutomobiles'?", "schema": "CREATE TABLE SafetyTestingCounts (ID INT, Manufacturer VARCHAR(255), NumTests INT); INSERT INTO SafetyTestingCounts (ID, Manufacturer, NumTests) VALUES (1, 'Green Motors', 50), (2, 'FutureAutomobiles', 75), (3, 'Blue Cars', 60);", "sql": "SELECT SUM(NumTests) FROM SafetyTestingCounts WHERE Manufacturer = 'FutureAutomobiles';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 372).", "schema": null, "sql": "SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions\n FROM pg_stat_io WHERE context = 'vacuum' \\gset io_sum_vac_strategy_after_\nSELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions\n FROM pg_stat_io WHERE context = 'vacuum' \\gset io_sum_vac_strategy_after_\nSELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": true, "sql_length": 231, "num_statements": 1} {"question": "What is the most frequently ordered vegetarian item at location 1?", "schema": "CREATE TABLE sales_data_5 (sale_id INT, location_id INT, item_id INT, quantity_sold INT, sale_date DATE); INSERT INTO sales_data_5 (sale_id, location_id, item_id, quantity_sold, sale_date) VALUES (1, 1, 1, 50, '2021-05-01'), (2, 2, 2, 90, '2021-05-02'), (3, 1, 3, 80, '2021-05-03');", "sql": "SELECT location_id, item_id, SUM(quantity_sold) as total_sold FROM sales_data_5 WHERE location_id = 1 AND is_vegetarian = TRUE GROUP BY location_id, item_id ORDER BY total_sold DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "What are the total labor hours for all sustainable building projects in the city of Seattle?", "schema": "CREATE TABLE project (id INT, city VARCHAR(20), type VARCHAR(20), hours INT); INSERT INTO project (id, city, type, hours) VALUES (1, 'Seattle', 'Sustainable', 500), (2, 'NYC', 'Sustainable', 800), (3, 'Seattle', 'Traditional', 300);", "sql": "SELECT SUM(hours) FROM project WHERE city = 'Seattle' AND type = 'Sustainable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "PostgreSQL regression test 'vacuum': Write the SELECT query (example 69).", "schema": null, "sql": "SELECT reltuples, relhassubclass\n FROM pg_class WHERE oid = 'past_parted'::regclass;", "explanation": "Regression test for Vacuum in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT reltuples, relhassubclass\n FROM pg_class WHERE oid = 'past_parted'::regclass) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Venue has a Status of five nations, and Against of 0?", "schema": "CREATE TABLE table_name_89 (venue VARCHAR, status VARCHAR, against VARCHAR)", "sql": "SELECT venue FROM table_name_89 WHERE status = 'five nations' AND against = 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the minimum budget for a climate finance project in South America?", "schema": "CREATE TABLE climate_finance_projects (project_id INT, project_name VARCHAR(255), location VARCHAR(255), budget DECIMAL(10,2)); INSERT INTO climate_finance_projects (project_id, project_name, location, budget) VALUES (1, 'Renewable Energy in Brazil', 'Brazil', 3000000.00), (2, 'Energy Efficiency in Argentina', 'Argentina', 2000000.00), (3, 'Climate Resilience in Colombia', 'Colombia', 1000000.00);", "sql": "SELECT MIN(budget) FROM climate_finance_projects WHERE location = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the official languages of the countries of players from Maryland or Duke college?", "schema": "CREATE TABLE country (Official_native_language VARCHAR, Country_id VARCHAR); CREATE TABLE match_season (Country VARCHAR, College VARCHAR)", "sql": "SELECT T1.Official_native_language FROM country AS T1 JOIN match_season AS T2 ON T1.Country_id = T2.Country WHERE T2.College = 'Maryland' OR T2.College = 'Duke';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What is the total number of unique volunteers who have contributed more than '50' hours to each organization?", "schema": "CREATE TABLE Organizations (org_id INT, org_name TEXT); CREATE TABLE Volunteers (vol_id INT, volunteer_name TEXT, hours_contributed INT, org_id INT);", "sql": "SELECT O.org_name, COUNT(DISTINCT V.vol_id) as total_volunteers FROM Organizations O INNER JOIN Volunteers V ON O.org_id = V.org_id WHERE V.hours_contributed > 50 GROUP BY O.org_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plpython' (example 36).", "schema": null, "sql": "SELECT roundtrip('[\"string\", \"string2\"]'::jsonb);", "explanation": "Example query from the 'jsonb_plpython' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 49, "num_statements": 1} {"question": "Which accommodations were provided to each student in the AccommodationHistory table?", "schema": "CREATE TABLE AccommodationHistory (studentID INT, accommodationType VARCHAR(50), startDate DATE, endDate DATE);", "sql": "SELECT studentID, GROUP_CONCAT(accommodationType) FROM AccommodationHistory GROUP BY studentID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 6).", "schema": null, "sql": "SELECT * FROM test_time WHERE i<='08:55:08'::time ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Update the 'rare_earth_market' table to reflect the current market price of Dysprosium as $210 per kg.", "schema": "CREATE TABLE rare_earth_market (id INT, element TEXT, current_price FLOAT);", "sql": "UPDATE rare_earth_market SET current_price = 210 WHERE element = 'Dysprosium';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total amount spent on ingredients for each menu item, excluding items with no sales, grouped by category?", "schema": "CREATE TABLE menu_items (id INT, name TEXT, category TEXT, sales INT); CREATE TABLE ingredient_costs (menu_item_id INT, ingredient TEXT, cost INT);", "sql": "SELECT menu_items.category, SUM(ingredient_costs.cost) FROM ingredient_costs JOIN menu_items ON ingredient_costs.menu_item_id = menu_items.id WHERE menu_items.sales > 0 GROUP BY menu_items.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score against home team, Richmond?", "schema": "CREATE TABLE table_name_28 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_28 WHERE home_team = 'richmond';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the percentage of uninsured individuals by race and ethnicity, and how does it compare to the overall percentage?", "schema": "CREATE TABLE RaceEthnicityData (RaceEthnicity VARCHAR(255), Uninsured DECIMAL(3,1)); INSERT INTO RaceEthnicityData (RaceEthnicity, Uninsured) VALUES ('Asian', 5.0), ('Black', 12.0), ('Hispanic', 18.0), ('White', 8.0); CREATE TABLE OverallData (OverallUninsured DECIMAL(3,1)); INSERT INTO OverallData (OverallUninsured) VALUES (10.0);", "sql": "SELECT RaceEthnicity, Uninsured, Uninsured * 100.0 / (SELECT OverallUninsured FROM OverallData) AS Percentage FROM RaceEthnicityData;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Johnnie Parsons rank when he completed over 200 laps?", "schema": "CREATE TABLE table_name_67 (rank VARCHAR, laps INTEGER)", "sql": "SELECT rank FROM table_name_67 WHERE laps > 200;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the total number of games designed by female designers, and the number of esports events where at least one of these games was played?", "schema": "CREATE TABLE Games (GameID INT, GameName VARCHAR(50), Genre VARCHAR(20)); CREATE TABLE GameDesigners (DesignerID INT, DesignerName VARCHAR(50), Gender VARCHAR(10)); CREATE TABLE GameDesign (GameID INT, DesignerID INT); CREATE TABLE EsportsEvents (EventID INT, GameID INT, EventName VARCHAR(50));", "sql": "SELECT COUNT(DISTINCT Games.GameID), COUNT(DISTINCT EsportsEvents.EventID) FROM Games INNER JOIN GameDesign ON Games.GameID = GameDesign.GameID INNER JOIN GameDesigners ON GameDesign.DesignerID = GameDesigners.DesignerID LEFT JOIN EsportsEvents ON Games.GameID = EsportsEvents.GameID WHERE GameDesigners.Gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 322, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the highest points for the team of marlboro brm with brm p180 as the chassis?", "schema": "CREATE TABLE table_name_43 (points INTEGER, team VARCHAR, chassis VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_43 WHERE team = 'marlboro brm' AND chassis = 'brm p180';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 245).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (5,5,'1.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 852, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 4).", "schema": null, "sql": "CREATE FUNCTION _int_overlap_sel(internal, oid, internal, integer)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT STABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When вуха (vúkha) is the belarusian how many slovaks are there?", "schema": "CREATE TABLE table_26757_4 (slovak VARCHAR, belarusian VARCHAR)", "sql": "SELECT COUNT(slovak) FROM table_26757_4 WHERE belarusian = 'вуха (vúkha)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Which ocean has the lowest average sea surface temperature?'", "schema": "CREATE TABLE ocean_temperatures (ocean TEXT, avg_temp FLOAT); INSERT INTO ocean_temperatures (ocean, avg_temp) VALUES ('Pacific', 28.0); INSERT INTO ocean_temperatures (ocean, avg_temp) VALUES ('Atlantic', 27.5); INSERT INTO ocean_temperatures (ocean, avg_temp) VALUES ('Arctic', 0.0);", "sql": "SELECT ocean, MIN(avg_temp) FROM ocean_temperatures;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all the Store_Name of drama workshop groups.", "schema": "CREATE TABLE Drama_Workshop_Groups (Store_Name VARCHAR)", "sql": "SELECT Store_Name FROM Drama_Workshop_Groups;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'spgist' (example 12).", "schema": null, "sql": "insert into spgist_box_tbl(b)\nselect box(point(i,j),point(i+s,j+s))\n from generate_series(1,100,5) i,\n generate_series(1,100,5) j,\n generate_series(1,10) s;", "explanation": "DML from PostgreSQL core regression test for Spgist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "List all marine species that have a conservation status of 'Vulnerable' in the Southern Ocean?", "schema": "CREATE TABLE species (id INT, name VARCHAR(255), conservation_status VARCHAR(255), ocean_basin VARCHAR(255)); INSERT INTO species (id, name, conservation_status, ocean_basin) VALUES (1, 'Atlantic Salmon', 'Endangered', 'Atlantic'), (2, 'Blue Whale', 'Critically Endangered', 'Pacific'), (3, 'Southern Ocean Orca', 'Vulnerable', 'Southern');", "sql": "SELECT name FROM species WHERE conservation_status = 'Vulnerable' AND ocean_basin = 'Southern';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the numbers of the nba draft where the player went to kentucky", "schema": "CREATE TABLE table_11677760_31 (nba_draft VARCHAR, college VARCHAR)", "sql": "SELECT nba_draft FROM table_11677760_31 WHERE college = 'Kentucky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total amount of research grants awarded to graduate students from the Computer Science department in the last 2 years?", "schema": "CREATE TABLE GrantRecipients(StudentID INT, Department VARCHAR(50), GrantID INT, GrantAmt FLOAT, GrantDate DATE); INSERT INTO GrantRecipients (StudentID, Department, GrantID, GrantAmt, GrantDate) VALUES (1, 'Computer Science', 1, 150000.00, '2021-01-01');", "sql": "SELECT SUM(GrantAmt) FROM GrantRecipients WHERE Department = 'Computer Science' AND YEAR(GrantDate) >= YEAR(CURRENT_DATE()) - 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the election incumbent Brent Spence took place in?", "schema": "CREATE TABLE table_1342218_17 (result VARCHAR, incumbent VARCHAR)", "sql": "SELECT result FROM table_1342218_17 WHERE incumbent = 'Brent Spence';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 41).", "schema": null, "sql": "SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\\1..\\2') FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')\nWHERE data NOT LIKE '%INSERT: %';", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "List the chemical_ids and total production quantities for chemicals produced in Argentina", "schema": "CREATE TABLE chemical_production (id INT PRIMARY KEY, chemical_id VARCHAR(10), quantity INT, country VARCHAR(50)); INSERT INTO chemical_production (id, chemical_id, quantity, country) VALUES (1, 'XY987', 700, 'Brazil'), (2, 'GH247', 600, 'India'), (3, 'XY987', 300, 'Australia'), (4, 'GH247', 500, 'India'), (5, 'GH247', 800, 'Brazil'), (6, 'XY987', 200, 'Chile'), (7, 'LM345', 150, 'Argentina'), (8, 'XY987', 400, 'Argentina');", "sql": "SELECT chemical_id, SUM(quantity) FROM chemical_production WHERE country = 'Argentina' GROUP BY chemical_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Identify the number of unique passengers who have traveled on each route during the month of March 2023", "schema": "CREATE TABLE passengers (passenger_id INT, passenger_name VARCHAR(20)); CREATE TABLE passenger_trips (trip_id INT, passenger_id INT, route_id INT, trip_date DATE);", "sql": "SELECT routes.route_name, COUNT(DISTINCT passengers.passenger_id) FROM passengers JOIN passenger_trips ON passengers.passenger_id = passenger_trips.passenger_id JOIN routes ON passenger_trips.route_id = routes.route_id WHERE passenger_trips.trip_date BETWEEN '2023-03-01' AND '2023-03-31' GROUP BY routes.route_id, routes.route_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 333, "num_statements": 1} {"question": "What is the average CO2 emissions reduction for projects using carbon capture technology?", "schema": "CREATE TABLE co2_emissions (project_id INT, co2_reduction INT); INSERT INTO co2_emissions (project_id, co2_reduction) VALUES (1, 5000), (2, 6000), (3, 7000), (4, 4000), (5, 8000);", "sql": "SELECT AVG(co2_reduction) FROM co2_emissions WHERE project_id IN (SELECT project_id FROM carbon_capture);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was roberto travern's record when he fought against john salter?", "schema": "CREATE TABLE table_name_60 (record VARCHAR, opponent VARCHAR)", "sql": "SELECT record FROM table_name_60 WHERE opponent = 'john salter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of underage visitors who have visited the museum in the last 3 months, by day?", "schema": "CREATE TABLE Visitors (VisitorID INT, Age INT, VisitDate DATE); INSERT INTO Visitors (VisitorID, Age, VisitDate) VALUES (1, 14, '2022-05-01'); INSERT INTO Visitors (VisitorID, Age, VisitDate) VALUES (2, 17, '2022-05-03'); INSERT INTO Visitors (VisitorID, Age, VisitDate) VALUES (3, 12, '2022-05-05');", "sql": "SELECT VisitDate, COUNT(VisitorID) as Count FROM Visitors WHERE Age < 18 AND VisitDate >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) GROUP BY VisitDate;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the average price of dishes in each menu category, excluding the cheapest and most expensive dishes?", "schema": "CREATE TABLE menu (category VARCHAR(255), price FLOAT); INSERT INTO menu (category, price) VALUES ('Appetizers', 7.99), ('Entrees', 14.99), ('Desserts', 5.99), ('Drinks', 2.99), ('Sides', 1.99);", "sql": "SELECT category, AVG(price) FROM (SELECT category, price FROM menu WHERE price NOT IN (SELECT MIN(price) FROM menu WHERE category = menu.category) AND price NOT IN (SELECT MAX(price) FROM menu WHERE category = menu.category)) AS filtered_menu GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'alter_generic' (example 186).", "schema": null, "sql": "ALTER TEXT SEARCH TEMPLATE alt_ts_temp2 SET SCHEMA alt_nsp2; -- failed (name conflict)\n\n-- invalid: non-lowercase quoted identifiers\nCREATE TEXT SEARCH TEMPLATE tstemp_case (\"Init\" = init_function);", "explanation": "PL/pgSQL object from PostgreSQL core test for Alter Generic.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: How much money did David Toms get?", "schema": "CREATE TABLE table_name_16 (money___$__ VARCHAR, player VARCHAR)", "sql": "SELECT money___$__ FROM table_name_16 WHERE player = 'david toms';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_index' (example 452).", "schema": null, "sql": "INSERT INTO concur_reindex_tab3 VALUES (3, '[1,2]');", "explanation": "DML from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Which textile suppliers prioritize sustainability?", "schema": "CREATE TABLE TextileSuppliers (SupplierID INT, SupplierName TEXT, SustainabilityRating INT); INSERT INTO TextileSuppliers (SupplierID, SupplierName, SustainabilityRating) VALUES (1, 'GreenFabrics', 9), (2, 'EcoWeave', 8), (3, 'StandardTextiles', 5);", "sql": "SELECT SupplierName FROM TextileSuppliers WHERE SustainabilityRating >= 8;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the average transaction amount for each customer-region pair?", "schema": "CREATE TABLE Transactions (id INT, customer_id INT, region VARCHAR(10), amount DECIMAL(10, 2)); INSERT INTO Transactions (id, customer_id, region, amount) VALUES (1, 10, 'Europe', 100), (2, 10, 'Asia', 200), (3, 11, 'Asia', 300), (4, 12, 'Europe', 400), (5, 10, 'Americas', 500), (6, 13, 'Americas', 600);", "sql": "SELECT customer_id, region, AVG(amount) as avg_amount FROM Transactions GROUP BY customer_id, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the high points on March 29?", "schema": "CREATE TABLE table_name_88 (high_points VARCHAR, date VARCHAR)", "sql": "SELECT high_points FROM table_name_88 WHERE date = 'march 29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total number of emergency calls received in the \"northwest\" neighborhood in the months of January and February?", "schema": "CREATE TABLE monthly_emergency_calls (date DATE, neighborhood VARCHAR(20), calls INT); INSERT INTO monthly_emergency_calls (date, neighborhood, calls) VALUES ('2022-01-01', 'northwest', 50), ('2022-02-01', 'northwest', 60), ('2022-01-02', 'northwest', 55), ('2022-02-02', 'northwest', 65);", "sql": "SELECT SUM(calls) FROM monthly_emergency_calls WHERE neighborhood = 'northwest' AND EXTRACT(MONTH FROM date) IN (1, 2);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest 3620 value with a 5432 of 5432 and a 15122 greater than 15122?", "schema": "CREATE TABLE table_name_34 (Id VARCHAR)", "sql": "SELECT MAX(3620) FROM table_name_34 WHERE 5432 = 5432 AND 15122 > 15122;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert_conflict' (example 46).", "schema": null, "sql": "insert into insertconflicttest values (4, 'Mango') on conflict (fruit, key) do update set fruit = excluded.fruit;", "explanation": "DML from PostgreSQL core regression test for Insert Conflict.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the notes when the time is 7:58.71?", "schema": "CREATE TABLE table_name_44 (notes VARCHAR, time VARCHAR)", "sql": "SELECT notes FROM table_name_44 WHERE time = '7:58.71';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'collate': Write the SELECT query (example 49).", "schema": null, "sql": "SELECT a, b::testdomain_p FROM collate_test2 ORDER BY 2;", "explanation": "Regression test for Collate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a, b::testdomain_p FROM collate_test2 ORDER BY 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What current conference is Post University a member of?", "schema": "CREATE TABLE table_12936521_2 (current_conference VARCHAR, institution VARCHAR)", "sql": "SELECT current_conference FROM table_12936521_2 WHERE institution = 'Post University';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many airport with rank being 4", "schema": "CREATE TABLE table_13836704_9 (airport VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(airport) FROM table_13836704_9 WHERE rank = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the minimum coral cover for the last 10 years?", "schema": "CREATE TABLE coral_cover (year INT, coral_cover FLOAT); INSERT INTO coral_cover (year, coral_cover) VALUES (2011, 25.0), (2012, 23.5), (2013, 22.2), (2014, 21.9), (2015, 21.1), (2016, 20.4), (2017, 19.8), (2018, 19.2), (2019, 18.8), (2020, 18.5);", "sql": "SELECT MIN(coral_cover) FROM coral_cover WHERE year BETWEEN (YEAR(CURRENT_DATE) - 10) AND YEAR(CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the total number of military equipment sold by Boeing to the Australian government in 2021?", "schema": "CREATE TABLE military_sales_4 (supplier VARCHAR(255), buyer VARCHAR(255), equipment VARCHAR(255), year INTEGER, quantity INTEGER); INSERT INTO military_sales_4 (supplier, buyer, equipment, year, quantity) VALUES ('Boeing', 'Australian Government', 'Super Hornet Fighter Jet', 2021, 5), ('Boeing', 'Australian Government', 'Chinook Helicopter', 2021, 2);", "sql": "SELECT SUM(quantity) FROM military_sales_4 WHERE supplier = 'Boeing' AND buyer = 'Australian Government' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Determine the percentage change in monthly production of Lanthanum and Cerium from 2015 to 2016", "schema": "CREATE TABLE production (element VARCHAR(10), year INT, month INT, quantity INT); INSERT INTO production (element, year, month, quantity) VALUES ('Lanthanum', 2015, 1, 100), ('Lanthanum', 2015, 2, 110), ('Lanthanum', 2016, 1, 120), ('Lanthanum', 2016, 2, 130), ('Cerium', 2015, 1, 140), ('Cerium', 2015, 2, 150), ('Cerium', 2016, 1, 160), ('Cerium', 2016, 2, 170);", "sql": "SELECT element, (SUM(quantity * CASE WHEN year = 2016 THEN 1 ELSE -1 END) / SUM(quantity) * 100) AS percentage_change FROM production WHERE element IN ('Lanthanum', 'Cerium') GROUP BY element;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Player has Goals larger than 10, and a Debut year smaller than 1993, and Years at club of 1990–1993?", "schema": "CREATE TABLE table_name_71 (player VARCHAR, years_at_club VARCHAR, goals VARCHAR, debut_year VARCHAR)", "sql": "SELECT player FROM table_name_71 WHERE goals > 10 AND debut_year < 1993 AND years_at_club = '1990–1993';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the maximum number of accessible technology initiatives in a region?", "schema": "CREATE TABLE accessible_technology_initiatives (id INT, initiative_name VARCHAR(50), region VARCHAR(50)); INSERT INTO accessible_technology_initiatives (id, initiative_name, region) VALUES (1, 'Accessible Software Distribution', 'Asia'), (2, 'Hardware Adaptation for Persons with Disabilities', 'Europe'), (3, 'Inclusive Technology Education', 'Africa'), (4, 'Assistive Technology Research', 'North America');", "sql": "SELECT region, COUNT(*) as initiative_count FROM accessible_technology_initiatives GROUP BY region ORDER BY initiative_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What are the unique severity levels in the 'incidents' table?", "schema": "CREATE TABLE incidents (incident_id INT, region VARCHAR(50), severity VARCHAR(10)); INSERT INTO incidents (incident_id, region, severity) VALUES (1, 'region_1', 'medium'), (2, 'region_2', 'high'), (3, 'region_3', 'high'), (4, 'region_1', 'low'), (5, 'region_3', 'medium');", "sql": "SELECT DISTINCT severity FROM incidents;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Increase the size of farm 'Earthy Bounty' by 25", "schema": "CREATE TABLE farms (id INT, name TEXT, location TEXT, size FLOAT); INSERT INTO farms (id, name, location, size) VALUES (1, 'Earthy Bounty', 'US', 200.0);", "sql": "UPDATE farms SET size = size + 25 WHERE name = 'Earthy Bounty';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How can we update donor information for a specific region?", "schema": "CREATE TABLE Donors (DonorID INT, Region VARCHAR(50), Name VARCHAR(50), Age INT, Donation DECIMAL(10,2)); INSERT INTO Donors (DonorID, Region, Name, Age, Donation) VALUES (1, 'Africa', 'John Doe', 30, 500), (2, 'Asia', 'Jane Smith', 40, 700), (3, 'Europe', 'Alice Johnson', 50, 900);", "sql": "UPDATE Donors SET Name = 'James Brown', Age = 45 WHERE Region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The Utah Jazz Player from UTEP was what nationality?", "schema": "CREATE TABLE table_name_51 (nationality VARCHAR, school_club_team VARCHAR)", "sql": "SELECT nationality FROM table_name_51 WHERE school_club_team = 'utep';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all policies with a premium greater than $2000 for policyholders living in 'California' or 'New York'.", "schema": "CREATE TABLE Policies (PolicyID INT, Premium DECIMAL(10, 2), PolicyholderState VARCHAR(10)); INSERT INTO Policies (PolicyID, Premium, PolicyholderState) VALUES (1, 2500, 'California'), (2, 1500, 'New York'), (3, 1000, 'California');", "sql": "SELECT * FROM Policies WHERE PolicyholderState IN ('California', 'New York') AND Premium > 2000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the candidates when Shirley Chisholm was the incumbent?", "schema": "CREATE TABLE table_1341663_33 (candidates VARCHAR, incumbent VARCHAR)", "sql": "SELECT candidates FROM table_1341663_33 WHERE incumbent = 'Shirley Chisholm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show me the highest confidence threat indicator for each category that was updated in 2021.", "schema": "CREATE TABLE ThreatIntel (id INT PRIMARY KEY, indicator_type VARCHAR(255), indicator_value VARCHAR(255), confidence INT, category VARCHAR(255), description TEXT, last_updated DATE); INSERT INTO ThreatIntel (id, indicator_type, indicator_value, confidence, category, description, last_updated) VALUES (1, 'IP', '138.197.228.54', 90, 'Malware', 'Known malware C&C server', '2021-01-10'), (2, 'Domain', 'example-malware[.]com', 85, 'Phishing', 'Phishing domain', '2021-02-15');", "sql": "SELECT category, MAX(confidence) FROM ThreatIntel WHERE last_updated >= '2021-01-01' GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the maximum ticket price for classical concerts?", "schema": "CREATE TABLE concert_prices (id INT, type VARCHAR(10), price DECIMAL(5,2)); INSERT INTO concert_prices (id, type, price) VALUES (1, 'classical', 35.50), (2, 'pop', 20.00), (3, 'classical', 40.00), (4, 'jazz', 28.30);", "sql": "SELECT MAX(price) FROM concert_prices WHERE type = 'classical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: with games more than 22 what is the rebound total?", "schema": "CREATE TABLE table_name_12 (rebounds INTEGER, games INTEGER)", "sql": "SELECT SUM(rebounds) FROM table_name_12 WHERE games > 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Which countries have the lowest number of suppliers with organic products in the SupplyChainTransparency table?", "schema": "CREATE TABLE SupplyChainTransparency(supplier_id INT, supplier_country VARCHAR(50), is_organic BOOLEAN);", "sql": "SELECT supplier_country, COUNT(*) as num_suppliers FROM SupplyChainTransparency WHERE is_organic = TRUE GROUP BY supplier_country ORDER BY num_suppliers ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "What is the average cost of materials used in each type of public works project?", "schema": "CREATE TABLE project_material_costs (id INT, project_type VARCHAR(255), material_type VARCHAR(255), cost FLOAT); INSERT INTO project_material_costs (id, project_type, material_type, cost) VALUES (1, 'Bridge', 'Steel', 150000.00), (2, 'Road', 'Asphalt', 50000.00), (3, 'Building', 'Concrete', 200000.00), (4, 'Bridge', 'Concrete', 300000.00);", "sql": "SELECT project_type, AVG(cost) as avg_cost FROM project_material_costs GROUP BY project_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the engine for the Bettenhausen Motorsports team?", "schema": "CREATE TABLE table_name_3 (engine VARCHAR, team VARCHAR)", "sql": "SELECT engine FROM table_name_3 WHERE team = 'bettenhausen motorsports';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What formated cataloged alca-9196?", "schema": "CREATE TABLE table_name_10 (format VARCHAR, catalog VARCHAR)", "sql": "SELECT format FROM table_name_10 WHERE catalog = 'alca-9196';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team did the Red Sox play against on June 3?", "schema": "CREATE TABLE table_name_16 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_16 WHERE date = 'june 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copy2' (example 1).", "schema": null, "sql": "CREATE TEMP TABLE x (\n\ta serial,\n\tb int,\n\tc text not null default 'stuff',\n\td text,\n\te text\n) ;", "explanation": "DDL from PostgreSQL core regression test for Copy2.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "Calculate the percentage of the total cargo tonnage that each shipping line is responsible for, for all shipping lines operating in the North America region.", "schema": "CREATE TABLE shipping_lines(line_id INT, line_name TEXT, region TEXT);CREATE TABLE cargo(cargo_id INT, line_id INT, port_id INT, tonnage INT);INSERT INTO shipping_lines VALUES (1,'Line A','North America'),(2,'Line B','North America'),(3,'Line C','Asia');INSERT INTO cargo VALUES (1,1,1,500),(2,1,1,800),(3,2,2,300),(4,3,3,500),(5,1,1,700);", "sql": "SELECT s.line_name, (SUM(c.tonnage) * 100.0 / (SELECT SUM(tonnage) FROM cargo)) as percentage FROM shipping_lines s JOIN cargo c ON s.line_id = c.line_id WHERE s.region = 'North America' GROUP BY s.line_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "What is the total number of research expeditions per year?", "schema": "CREATE TABLE expeditions (year INT, expedition_count INT);", "sql": "SELECT year, SUM(expedition_count) FROM expeditions GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 238).", "schema": null, "sql": "-- Create the operator class\nCREATE OPERATOR CLASS gist_macaddr8_ops\nDEFAULT FOR TYPE macaddr8 USING gist\nAS\n\tOPERATOR\t1\t< ,\n\tOPERATOR\t2\t<= ,\n\tOPERATOR\t3\t= ,\n\tOPERATOR\t4\t>= ,\n\tOPERATOR\t5\t> ,\n\tOPERATOR\t6\t<> ,\n\tFUNCTION\t1\tgbt_macad8_consistent (internal, macaddr8, int2, oid, internal),\n\tFUNCTION\t2\tgbt_macad8_union (internal, internal),\n\tFUNCTION\t3\tgbt_macad8_compress (internal),\n\tFUNCTION\t4\tgbt_decompress (internal),\n\tFUNCTION\t5\tgbt_macad8_penalty (internal, internal, internal),\n\tFUNCTION\t6\tgbt_macad8_picksplit (internal, internal),\n\tFUNCTION\t7\tgbt_macad8_same (gbtreekey16, gbtreekey16, internal),\n\tFUNCTION\t9\tgbt_macad8_fetch (internal),\n\tFUNCTION\t11\tgbt_macad8_sortsupport (internal),\n\tFUNCTION\t12 (\"any\", \"any\") gist_translate_cmptype_btree (int),\n\tSTORAGE\t\tgbtreekey16;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 781, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Format of the French Frequency with a Call sign of CKRL-FM?", "schema": "CREATE TABLE table_name_56 (format VARCHAR, notes VARCHAR, call_sign VARCHAR)", "sql": "SELECT format FROM table_name_56 WHERE notes = 'french' AND call_sign = 'ckrl-fm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Set 3 has a Score of 3–2, and a Set 2 of 25–23, and a Time of 19:00?", "schema": "CREATE TABLE table_name_86 (set_3 VARCHAR, time VARCHAR, score VARCHAR, set_2 VARCHAR)", "sql": "SELECT set_3 FROM table_name_86 WHERE score = '3–2' AND set_2 = '25–23' AND time = '19:00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the percentage of veterans employed in each state?", "schema": "CREATE TABLE veteran_employment (state TEXT, num_veterans INT, total_employees INT); INSERT INTO veteran_employment VALUES ('California', 10000, 50000), ('Texas', 12000, 60000);", "sql": "SELECT state, (num_veterans::DECIMAL(10,2) / total_employees::DECIMAL(10,2)) * 100 AS veteran_percentage FROM veteran_employment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who had high points on march 14?", "schema": "CREATE TABLE table_27700375_10 (high_points VARCHAR, date VARCHAR)", "sql": "SELECT high_points FROM table_27700375_10 WHERE date = 'March 14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What are the auto show locations for the 'GreenTech' auto show in 2022?", "schema": "CREATE TABLE AutoShowInfo (ID INT, Show VARCHAR(255), Location VARCHAR(255), Year INT); INSERT INTO AutoShowInfo (ID, Show, Location, Year) VALUES (1, 'GreenTech', 'Detroit', 2022), (2, 'GreenTech', 'Tokyo', 2023), (3, 'EcoExpo', 'Paris', 2022);", "sql": "SELECT Location FROM AutoShowInfo WHERE Show = 'GreenTech' AND Year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 274).", "schema": null, "sql": "SELECT '2011-03-27 03:00:00 MSK'::timestamptz;", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-27 03:00:00 MSK'::timestamptz) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For what episode was the nominee (s) Walon Green, Joe Sachs nominated as a result?", "schema": "CREATE TABLE table_name_52 (episode VARCHAR, result VARCHAR, nominee_s_ VARCHAR)", "sql": "SELECT episode FROM table_name_52 WHERE result = 'nominated' AND nominee_s_ = 'walon green, joe sachs';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_info' (example 56).", "schema": null, "sql": "CREATE MATERIALIZED VIEW ptif_test_matview AS SELECT 1;", "explanation": "DDL from PostgreSQL core regression test for Partition Info.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_view", "is_postgresql_specific": true, "sql_length": 55, "num_statements": 1} {"question": "Find the total number of rural healthcare facilities in each state.", "schema": "CREATE TABLE HealthcareFacilities (ID INT, Name TEXT, Location TEXT, State TEXT); INSERT INTO HealthcareFacilities VALUES (1, 'Rural General Hospital', 'Smalltown, KY', 'KY'); INSERT INTO HealthcareFacilities VALUES (2, 'Rural Health Clinic', 'Village, WV', 'WV'); INSERT INTO HealthcareFacilities VALUES (3, 'Rural General Hospital', 'Othertown, KY', 'KY');", "sql": "SELECT State, COUNT(*) AS Total FROM HealthcareFacilities GROUP BY State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Date has a Game of game 3?", "schema": "CREATE TABLE table_name_90 (date VARCHAR, game VARCHAR)", "sql": "SELECT date FROM table_name_90 WHERE game = 'game 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Grid has a Time of +13.999?", "schema": "CREATE TABLE table_name_65 (grid INTEGER, time VARCHAR)", "sql": "SELECT MIN(grid) FROM table_name_65 WHERE time = '+13.999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'random' (example 35).", "schema": null, "sql": "CREATE FUNCTION ks_test_uniform_random_numeric_in_range()\nRETURNS boolean AS\n$$\nDECLARE\n n int := 1000; -- Number of samples\n c float8 := 1.94947; -- Critical value for 99.9% confidence\n ok boolean;\nBEGIN\n ok := (\n WITH samples AS (\n SELECT random(0, 0.999999) r FROM generate_series(1, n) ORDER BY 1\n ), indexed_samples AS (\n SELECT (row_number() OVER())-1.0 i, r FROM samples\n )\n SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples\n );\n RETURN ok;\nEND\n$$\nLANGUAGE plpgsql;", "explanation": "DDL from PostgreSQL core regression test for Random.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 516, "num_statements": 6} {"question": "What is the minimum BMI of users aged 40-50?", "schema": "CREATE TABLE users (id INT, age INT, gender VARCHAR(10), height INT, weight INT); INSERT INTO users (id, age, gender, height, weight) VALUES (1, 45, 'Male', 175, 80); INSERT INTO users (id, age, gender, height, weight) VALUES (2, 35, 'Female', 165, 60); INSERT INTO users (id, age, gender, height, weight) VALUES (3, 50, 'Male', 180, 90); INSERT INTO users (id, age, gender, height, weight) VALUES (4, 48, 'Female', 170, 70);", "sql": "SELECT MIN(weight / POW(height / 100.0, 2)) as min_bmi FROM users WHERE age BETWEEN 40 AND 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Which ZIP codes have the highest obesity rates in Texas?", "schema": "CREATE TABLE zips (id INT, zip VARCHAR(10), state VARCHAR(255)); INSERT INTO zips (id, zip, state) VALUES (1, '12345', 'Texas'); CREATE TABLE health_surveys (id INT, zip VARCHAR(10), obesity_rate DECIMAL(5,2)); INSERT INTO health_surveys (id, zip, obesity_rate) VALUES (1, '12345', 35.6);", "sql": "SELECT z.zip, h.obesity_rate FROM zips z JOIN (SELECT zip, MAX(obesity_rate) AS max_obesity_rate FROM health_surveys WHERE state = 'Texas' GROUP BY zip) hs ON z.zip = hs.zip JOIN health_surveys h ON z.zip = h.zip WHERE z.state = 'Texas' ORDER BY h.obesity_rate DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 266, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Setup (example 13).", "schema": null, "sql": "-- Must reconnect to avoid failure with non-MULTIPLICITY Perl interpreters\n\\c -\nSET search_path = plperl_setup_scratch;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Setup.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had an Iflop of 2.5%?", "schema": "CREATE TABLE table_name_55 (party VARCHAR, ifop_5_30_09 VARCHAR)", "sql": "SELECT party FROM table_name_55 WHERE ifop_5_30_09 = '2.5%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the number of new hires in the last 3 months, grouped by hiring manager and total tenure at the company, ordered by the highest number of new hires?", "schema": "CREATE TABLE Employees (EmployeeID INT, HiringManager VARCHAR(20), Tenure INT, HireDate DATE); INSERT INTO Employees (EmployeeID, HiringManager, Tenure, HireDate) VALUES (1, 'John Doe', 2, '2020-01-01'), (2, 'Jane Smith', 5, '2019-01-01'), (3, 'John Doe', 3, '2021-01-01'), (4, 'Jane Smith', 1, '2022-01-01'), (5, 'John Doe', 6, '2018-01-01');", "sql": "SELECT HiringManager, Tenure, COUNT(*) as Num_NewHires FROM Employees WHERE HireDate >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) GROUP BY HiringManager, Tenure ORDER BY Num_NewHires DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 113).", "schema": null, "sql": "SELECT xml 'abc' IS NOT DOCUMENT;", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xml 'abc' IS NOT DOCUMENT) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the part number of the model with TDP of 2.9 (max.4.1~5.4) w?", "schema": "CREATE TABLE table_24096813_15 (part_number_s_ VARCHAR, tdp VARCHAR)", "sql": "SELECT part_number_s_ FROM table_24096813_15 WHERE tdp = '2.9 (Max.4.1~5.4) W';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the highest rebounds of the game on April 7?", "schema": "CREATE TABLE table_name_15 (high_rebounds VARCHAR, date VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_15 WHERE date = 'april 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What was the total R&D expenditure for 'DrugL''s clinical trials in 2020?", "schema": "CREATE TABLE clinical_trials (drug_name TEXT, rd_expenditure FLOAT, year INT); INSERT INTO clinical_trials (drug_name, rd_expenditure, year) VALUES ('DrugL', 9000000.0, 2020), ('DrugL', 7000000.0, 2019);", "sql": "SELECT drug_name, SUM(rd_expenditure) FROM clinical_trials WHERE drug_name = 'DrugL' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the melbourne for adelaide of no with auckland of yes and gold coast of yes", "schema": "CREATE TABLE table_name_49 (melbourne VARCHAR, gold_coast VARCHAR, adelaide VARCHAR, auckland VARCHAR)", "sql": "SELECT melbourne FROM table_name_49 WHERE adelaide = 'no' AND auckland = 'yes' AND gold_coast = 'no';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the titles of segment b when segment c is standby generators (part 1)?", "schema": "CREATE TABLE table_15187735_21 (segment_b VARCHAR, segment_c VARCHAR)", "sql": "SELECT segment_b FROM table_15187735_21 WHERE segment_c = 'Standby Generators (Part 1)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the race from round 1?", "schema": "CREATE TABLE table_name_90 (race_name VARCHAR, round VARCHAR)", "sql": "SELECT race_name FROM table_name_90 WHERE round = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team when North Melbourne was the home team?", "schema": "CREATE TABLE table_name_17 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_17 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Casualties have a hostile Nature of incident and Circumstances of mortar attack?", "schema": "CREATE TABLE table_name_67 (casualties VARCHAR, nature_of_incident VARCHAR, circumstances VARCHAR)", "sql": "SELECT casualties FROM table_name_67 WHERE nature_of_incident = 'hostile' AND circumstances = 'mortar attack';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What are the names of all military innovation projects that have not yet started?", "schema": "CREATE TABLE military_innovation (project_id INT, name TEXT, start_date DATE); INSERT INTO military_innovation (project_id, name, start_date) VALUES (1, 'Project A', '2022-01-01'), (2, 'Project B', '2023-01-01'), (3, 'Project C', NULL);", "sql": "SELECT name FROM military_innovation WHERE start_date IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total CO2 emission for each country per year?", "schema": "CREATE TABLE country (country_id INT, country_name VARCHAR(255)); INSERT INTO country (country_id, country_name) VALUES (1, 'CountryA'), (2, 'CountryB'); CREATE TABLE co2_emission (year INT, country_id INT, co2_emission INT); INSERT INTO co2_emission (year, country_id, co2_emission) VALUES (2000, 1, 1500), (2000, 2, 2000), (2001, 1, 1600), (2001, 2, 2200), (2002, 1, 1400), (2002, 2, 1800);", "sql": "SELECT country_id, SUM(co2_emission) as total_emission FROM co2_emission GROUP BY country_id, YEAR(year);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "How many high-risk customers are there in California?", "schema": "CREATE TABLE customers (customer_id INT, customer_name VARCHAR(50), state VARCHAR(20), risk_level VARCHAR(10)); INSERT INTO customers (customer_id, customer_name, state, risk_level) VALUES (1, 'John Doe', 'CA', 'high'), (2, 'Jane Smith', 'NY', 'medium');", "sql": "SELECT COUNT(*) FROM customers WHERE state = 'CA' AND risk_level = 'high';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Display the number of visitors to sustainable tourism events in Canada for each year", "schema": "CREATE TABLE visitors (id INT, event_type VARCHAR(50), year INT, country VARCHAR(50)); INSERT INTO visitors (id, event_type, year, country) VALUES (1, 'Sustainable', 2018, 'Canada'), (2, 'Non-Sustainable', 2019, 'Canada'), (3, 'Sustainable', 2017, 'Canada');", "sql": "SELECT year, COUNT(*) FROM visitors WHERE event_type = 'Sustainable' AND country = 'Canada' GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "PostgreSQL regression test 'limit': Write the SELECT query (example 5).", "schema": null, "sql": "SELECT ''::text AS zero, unique1, unique2, stringu1\n\t\tFROM onek WHERE unique1 < 50\n\t\tORDER BY unique1 DESC LIMIT 8 OFFSET 99;", "explanation": "Regression test for Limit in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ''::text AS zero, unique1, unique2, stringu1\n\t\tFROM onek WHERE unique1 < 50\n\t\tORDER BY unique1 DESC LIMIT 8 OFFSET 99) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What time is MDT when EDT is set and PDT is 6:00 a.m.?", "schema": "CREATE TABLE table_name_58 (mdt___6_utc_ VARCHAR, edt___4_utc_ VARCHAR, pdt___7_utc_ VARCHAR)", "sql": "SELECT mdt___6_utc_ FROM table_name_58 WHERE edt___4_utc_ = 'set' AND pdt___7_utc_ = '6:00 a.m.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the total number of publisher where cover date is may 1939", "schema": "CREATE TABLE table_1217448_1 (publisher VARCHAR, cover_date VARCHAR)", "sql": "SELECT COUNT(publisher) FROM table_1217448_1 WHERE cover_date = 'May 1939';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the sum of gold which has a silver more than 1", "schema": "CREATE TABLE table_name_23 (gold INTEGER, silver INTEGER)", "sql": "SELECT SUM(gold) FROM table_name_23 WHERE silver > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the democratic seat plurality with 29% democrat?", "schema": "CREATE TABLE table_name_94 (democratic_seat_plurality VARCHAR, percentage_democrats VARCHAR)", "sql": "SELECT democratic_seat_plurality FROM table_name_94 WHERE percentage_democrats = '29%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of union members in the education sector?", "schema": "CREATE TABLE union_members (id INT, workplace_id INT, member_name TEXT, member_join_date DATE, member_status TEXT); CREATE TABLE workplaces (id INT, name TEXT, location TEXT, sector TEXT, total_employees INT, successful_cb BOOLEAN, cb_year INT); INSERT INTO workplaces (id, name, location, sector, total_employees, successful_cb, cb_year) VALUES (1, 'School A', 'City X', 'education', 50, true, 2020), (2, 'University B', 'City Y', 'education', 3000, true, 2019); INSERT INTO union_members (id, workplace_id, member_name, member_join_date, member_status) VALUES (1, 1, 'John Doe', '2018-01-01', 'active'), (2, 1, 'Jane Smith', '2019-05-15', 'active'), (3, 2, 'Mike Johnson', '2020-03-01', 'active');", "sql": "SELECT SUM(um.member_status = 'active'::INTEGER) FROM union_members um JOIN workplaces w ON um.workplace_id = w.id WHERE w.sector = 'education';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 70).", "schema": null, "sql": "SELECT pg_partition_root('ptif_li_child');", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_partition_root('ptif_li_child')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the dbeingtribution with grsecurity being optional and compile time buffer checks being yes", "schema": "CREATE TABLE table_1357052_6 (distribution VARCHAR, grsecurity VARCHAR, compile_time_buffer_checks VARCHAR)", "sql": "SELECT distribution FROM table_1357052_6 WHERE grsecurity = 'Optional' AND compile_time_buffer_checks = 'Yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "PostgreSQL regression test 'create_view': Write the SELECT query (example 259).", "schema": null, "sql": "select pg_get_viewdef('tt16v', true);", "explanation": "Regression test for Create View in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_get_viewdef('tt16v', true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 283).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (6,5,'652755630.48149779047555948642041898982227467525089211162244620449564375525368726526946672639857607193613449752505543620695003436531392789029513380101663750625024853263344909355177280161504414335005574882649025508632900995595004153086358670541462762210415346958050909878501048483523600711486406055424807840429541335391538322886495085448421556770991545781035298449067051916630343957356635391594362639819978677032855590055900561501350354631803808000307050416047072513406855040715556454205065332997338225626635780147287003130754254277103928406089109802521803537038957372612837169223905290912251006321930223154562110264217937');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 661, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 135).", "schema": null, "sql": "SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS \"{{3,4},{5,6},{1,2}}\";", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS \"{{3,4},{5,6},{1,2}}\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "List all the completed rural infrastructure projects in the 'rural_infrastructure' table.", "schema": "CREATE TABLE rural_infrastructure (id INT PRIMARY KEY, project_name VARCHAR(100), budget INT, start_date DATE, end_date DATE, status VARCHAR(20));", "sql": "SELECT * FROM rural_infrastructure WHERE status = 'completed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Drop the 'claims' table", "schema": "CREATE TABLE claims (claim_id INT PRIMARY KEY, policyholder_id INT, claim_amount DECIMAL(10, 2), claim_date DATE);", "sql": "DROP TABLE claims;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 18, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Average, when Goals is \"34\", and when Matches is less than 37?", "schema": "CREATE TABLE table_name_41 (average INTEGER, goals VARCHAR, matches VARCHAR)", "sql": "SELECT MAX(average) FROM table_name_41 WHERE goals = 34 AND matches < 37;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 64).", "schema": null, "sql": "SELECT * FROM check_test(\n language_privs_are( 'plpgsql', current_user, '{USAGE}' ),\n true,\n 'language_privs_are(lang, role, privs, desc)',\n 'Role ' || quote_ident(current_user) || ' should be granted USAGE on language plpgsql',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 250, "num_statements": 1} {"question": "List the number of unique mental health conditions per patient in Texas.", "schema": "CREATE TABLE patient_mental_health_info (patient_id INT, condition VARCHAR(50)); INSERT INTO patient_mental_health_info (patient_id, condition) VALUES (1, 'Anxiety'), (1, 'Depression'), (2, 'Depression'), (3, 'PTSD'), (3, 'Anxiety'); CREATE TABLE patient_state (patient_id INT, state VARCHAR(50)); INSERT INTO patient_state (patient_id, state) VALUES (1, 'Texas'), (2, 'Texas'), (3, 'California');", "sql": "SELECT patient_id, COUNT(DISTINCT condition) AS unique_conditions FROM patient_mental_health_info JOIN patient_state ON patient_state.patient_id = patient_mental_health_info.patient_id WHERE state = 'Texas' GROUP BY patient_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 227, "num_statements": 1} {"question": "What is the total revenue generated from each garment type in the year 2022?", "schema": "CREATE TABLE sales (sale_id INT, garment_type VARCHAR(30), sale_date DATE, revenue DECIMAL(10,2));", "sql": "SELECT garment_type, YEAR(sale_date) AS year, SUM(revenue) AS total_revenue FROM sales WHERE YEAR(sale_date) = 2022 GROUP BY garment_type, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Add a new record to the \"programs\" table for a program named 'Arts and Crafts' with an ID of 5.", "schema": "CREATE TABLE programs (id INT, name TEXT); INSERT INTO programs (id, name) VALUES (1, 'Feeding the Hungry'), (2, 'Tutoring Kids'), (3, 'Cleaning the Environment'), (4, 'Medical Aid');", "sql": "INSERT INTO programs (id, name) VALUES (5, 'Arts and Crafts');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'tablefunc' (item 53).", "schema": null, "sql": "-- check it works with OUT parameters\n\nCREATE FUNCTION crosstab_out(text, text,\n OUT rowid text, OUT rowdt timestamp,\n OUT temperature int4, OUT test_result text,\n OUT test_startdate timestamp, OUT volts float8)\nRETURNS setof record\nAS '$libdir/tablefunc','crosstab_hash'\nLANGUAGE C STABLE STRICT;", "explanation": "SQL definition from the 'tablefunc' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 300, "num_statements": 1} {"question": "Update the risk assessment score for policyholder 4 to 600 based on their recent claim activity.", "schema": "CREATE TABLE Policyholders (PolicyID INT, CoverageLimit DECIMAL(10,2), RiskAssessmentScore INT); INSERT INTO Policyholders (PolicyID, CoverageLimit, RiskAssessmentScore) VALUES (1, 750000.00, 400), (2, 400000.00, 350), (4, 50000.00, 250); CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount DECIMAL(10,2)); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount) VALUES (1, 1, 5000.00), (2, 4, 2500.00);", "sql": "WITH UpdatedScores AS (UPDATE Policyholders SET RiskAssessmentScore = 600 WHERE PolicyID = 4 RETURNING *) SELECT * FROM UpdatedScores;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In how many years was Tom Fleming the final television commentator?", "schema": "CREATE TABLE table_17766232_7 (year_s_ VARCHAR, final_television_commentator VARCHAR)", "sql": "SELECT COUNT(year_s_) FROM table_17766232_7 WHERE final_television_commentator = 'Tom Fleming';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were his winnings when he had 14 top 10s?", "schema": "CREATE TABLE table_2169966_2 (winnings VARCHAR, top_10 VARCHAR)", "sql": "SELECT winnings FROM table_2169966_2 WHERE top_10 = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What club is R. H. C. Human who has a right arm medium pace bowling style a member of?", "schema": "CREATE TABLE table_name_41 (club VARCHAR, bowling_style VARCHAR, name VARCHAR)", "sql": "SELECT club FROM table_name_41 WHERE bowling_style = 'right arm medium pace' AND name = 'r. h. c. human';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year is the notes distance 1.83m?", "schema": "CREATE TABLE table_name_82 (year VARCHAR, notes VARCHAR)", "sql": "SELECT year FROM table_name_82 WHERE notes = '1.83m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "List all chemical products and their corresponding safety protocol numbers for facilities in California.", "schema": "CREATE TABLE chemical_products (id INT, product TEXT, safety_protocol INT); CREATE TABLE storage_facilities (id INT, name TEXT, state TEXT); INSERT INTO chemical_products (id, product, safety_protocol) VALUES (1, 'Product1', 123), (2, 'Product2', 456); INSERT INTO storage_facilities (id, name, state) VALUES (1, 'Facility1', 'California'), (2, 'Facility2', 'California');", "sql": "SELECT cp.product, cp.safety_protocol, sf.name FROM chemical_products cp INNER JOIN storage_facilities sf ON cp.id = sf.id WHERE sf.state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Delete all records with 'Copper coin' artifact_type from the 'site_k_artifacts' table.", "schema": "CREATE TABLE site_k_artifacts (id INT PRIMARY KEY, site_id INT, artifact_type VARCHAR(50), quantity INT); INSERT INTO site_k_artifacts (id, site_id, artifact_type, quantity) VALUES (1, 8, 'Stone tool', 15), (2, 8, 'Pottery shard', 20), (3, 8, 'Copper coin', 5), (4, 8, 'Bronze coin', 10);", "sql": "DELETE FROM site_k_artifacts WHERE artifact_type = 'Copper coin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'returning' (example 102).", "schema": null, "sql": "UPDATE foo SET f4 = f4 + 1 RETURNING old.f3; -- should fail\nUPDATE foo SET f4 = f4 + 1 RETURNING old, new;", "explanation": "DML from PostgreSQL core regression test for Returning.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 107, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What sort of Fuel/Trans does the Grey & Red locomotive have?", "schema": "CREATE TABLE table_name_15 (fuel__trans VARCHAR, colour VARCHAR)", "sql": "SELECT fuel__trans FROM table_name_15 WHERE colour = 'grey & red';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What are the most common types of marine pollution in the Indian Ocean?", "schema": "CREATE TABLE marine_pollution (pollution_id INT, type VARCHAR(50), ocean VARCHAR(20));", "sql": "SELECT type, COUNT(*) FROM marine_pollution WHERE ocean = 'Indian Ocean' GROUP BY type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Delete all records from the 'decentralized_applications' table where the 'app_category' is 'Gaming'", "schema": "CREATE TABLE decentralized_applications (app_id INT PRIMARY KEY, app_name VARCHAR(100), app_category VARCHAR(50));", "sql": "DELETE FROM decentralized_applications WHERE app_category = 'Gaming';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total amount of waste generated by each mining operation in 2020?", "schema": "CREATE TABLE waste_generation (operation_id INT, operation_name TEXT, year INT, waste_generated INT);", "sql": "SELECT operation_name, SUM(waste_generated) AS total_waste_generated FROM waste_generation WHERE year = 2020 GROUP BY operation_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 542).", "schema": null, "sql": "INSERT INTO temporal_mltrng (id, valid_at) VALUES\n ('[6,7)', datemultirange(daterange('2018-01-01', '2018-02-01'))),\n ('[6,7)', datemultirange(daterange('2018-02-01', '2018-03-01')));", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "What is the average energy efficiency (in kWh/m2) of buildings in the 'asia_pacific' region, partitioned by country and ordered by energy efficiency in ascending order?", "schema": "CREATE TABLE buildings (id INT, country VARCHAR(50), region VARCHAR(50), energy_efficiency FLOAT); INSERT INTO buildings (id, country, region, energy_efficiency) VALUES (1, 'China', 'asia_pacific', 2.34), (2, 'Japan', 'asia_pacific', 1.23), (3, 'India', 'asia_pacific', 3.45);", "sql": "SELECT region, country, AVG(energy_efficiency) as avg_energy_efficiency FROM buildings WHERE region = 'asia_pacific' GROUP BY country, region ORDER BY avg_energy_efficiency ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What is the total number of volunteer hours per month for the year 2019?", "schema": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, hours INT, hour_date DATE);", "sql": "SELECT EXTRACT(MONTH FROM hour_date) as month, SUM(hours) as total_hours FROM volunteer_hours WHERE YEAR(hour_date) = 2019 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Update the depth of the 'Tonga Trench' in the ocean_floor_mapping table to 10900.", "schema": "CREATE TABLE ocean_floor_mapping (location TEXT, depth INTEGER); INSERT INTO ocean_floor_mapping (location, depth) VALUES ('Challenger Deep', 10994), ('Mariana Trench', 10972), ('Tonga Trench', 10823);", "sql": "UPDATE ocean_floor_mapping SET depth = 10900 WHERE location = 'Tonga Trench';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the standing in the season that the pct% was 0.769?", "schema": "CREATE TABLE table_2110959_1 (standing VARCHAR, pct__percentage VARCHAR)", "sql": "SELECT standing FROM table_2110959_1 WHERE pct__percentage = '0.769';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 157).", "schema": null, "sql": "SELECT pg_clear_extended_stats(schemaname => 'stats_import',\n relname => 'test',\n statistics_schemaname => 'stats_import',\n statistics_name => 'test_stat_clone',\n inherited => false);", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_clear_extended_stats(schemaname => 'stats_import',\n relname => 'test',\n statistics_schemaname => 'stats_import',\n statistics_name => 'test_stat_clone',\n inherited => false)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Count the number of oil wells in the Niger Delta and their total daily production", "schema": "CREATE TABLE oil_wells (well_id INT, location VARCHAR(20), daily_production FLOAT); INSERT INTO oil_wells (well_id, location, daily_production) VALUES (1, 'Niger Delta', 150.2), (2, 'Niger Delta', 160.3), (3, 'Niger Delta', 140.1);", "sql": "SELECT location, COUNT(*), SUM(daily_production) FROM oil_wells WHERE location = 'Niger Delta' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What episode number in the series had 2.77 million U.S. viewers?", "schema": "CREATE TABLE table_24319661_5 (no_in_series INTEGER, us_viewers__million_ VARCHAR)", "sql": "SELECT MAX(no_in_series) FROM table_24319661_5 WHERE us_viewers__million_ = '2.77';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the start 33?", "schema": "CREATE TABLE table_name_97 (year INTEGER, start VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_97 WHERE start = '33';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country placed t6 with player Vijay Singh?", "schema": "CREATE TABLE table_name_91 (country VARCHAR, place VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_91 WHERE place = 't6' AND player = 'vijay singh';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 259).", "schema": null, "sql": "select '42'::jsonb #>> array['0'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '42'::jsonb #>> array['0']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 34, "num_statements": 1} {"question": "What is the total square footage of building projects per labor category?", "schema": "CREATE TABLE labor_statistics (labor_category VARCHAR(50), average_wage NUMERIC(10,2)); INSERT INTO labor_statistics (labor_category, average_wage) VALUES ('Carpenters', '35.56'), ('Electricians', '38.42'), ('Plumbers', '42.15'); CREATE TABLE project_data (project_id SERIAL PRIMARY KEY, labor_category VARCHAR(50), square_footage INTEGER); INSERT INTO project_data (project_id, labor_category, square_footage) VALUES (1, 'Carpenters', 15000), (2, 'Electricians', 20000), (3, 'Plumbers', 25000);", "sql": "SELECT labor_category, SUM(square_footage) FROM project_data GROUP BY labor_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Trigger (assertion 3).", "schema": null, "sql": "CREATE TABLE public.users(\n nick text NOT NULL PRIMARY KEY,\n pass text NOT NULL\n);", "explanation": "SQL assertion from pgTAP test suite for Trigger.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Update nutrition data for 'Green Earth' to reflect new certifications.", "schema": "CREATE TABLE NutritionData (SupplierID INT, Certification TEXT); INSERT INTO NutritionData (SupplierID, Certification) VALUES (1, 'Organic'), (2, 'Non-GMO');", "sql": "UPDATE NutritionData SET Certification = 'Biodynamic' WHERE SupplierID = (SELECT SupplierID FROM Suppliers WHERE SupplierName = 'Green Earth');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different high assists results are there for the game played on February 24?", "schema": "CREATE TABLE table_23248940_9 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(high_assists) FROM table_23248940_9 WHERE date = 'February 24';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the minimum funding received by a startup founded by a person with a disability in the finance sector?", "schema": "CREATE TABLE startups(id INT, name TEXT, industry TEXT, founder_ability TEXT, funding FLOAT); INSERT INTO startups (id, name, industry, founder_ability, funding) VALUES (1, 'FinanceAbility', 'Finance', 'Disabled', 2000000);", "sql": "SELECT MIN(funding) FROM startups WHERE industry = 'Finance' AND founder_ability = 'Disabled';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total investment in agricultural innovation in South Asia in the past 3 years?", "schema": "CREATE TABLE investment (id INT, project TEXT, location TEXT, investment_amount INT, year INT); INSERT INTO investment (id, project, location, investment_amount, year) VALUES (1, 'Potato Seed Project', 'India', 200000, 2019), (2, 'Corn Seed Project', 'Pakistan', 300000, 2020), (3, 'Rice Seed Project', 'Bangladesh', 150000, 2018), (4, 'Wheat Seed Project', 'Sri Lanka', 250000, 2021);", "sql": "SELECT SUM(investment_amount) FROM investment WHERE location LIKE 'South%' AND year BETWEEN 2019 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average crowd when the home team is north melbourne?", "schema": "CREATE TABLE table_name_31 (crowd INTEGER, home_team VARCHAR)", "sql": "SELECT AVG(crowd) FROM table_name_31 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of years when Mika Miyazato won the silver?", "schema": "CREATE TABLE table_name_11 (year VARCHAR, silver VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_11 WHERE silver = 'mika miyazato';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What colors does David Langdon use?", "schema": "CREATE TABLE table_19624708_1 (colours VARCHAR, owner VARCHAR)", "sql": "SELECT colours FROM table_19624708_1 WHERE owner = 'David Langdon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total quantity of materials used in production for each brand?", "schema": "CREATE TABLE brands (id INT, name VARCHAR(50)); CREATE TABLE materials_used (id INT, brand_id INT, material VARCHAR(50), quantity INT); INSERT INTO brands (id, name) VALUES (1, 'Brand A'), (2, 'Brand B'), (3, 'Brand C'); INSERT INTO materials_used (id, brand_id, material, quantity) VALUES (1, 1, 'Organic Cotton', 100), (2, 1, 'Recycled Polyester', 150), (3, 2, 'Organic Cotton', 200), (4, 3, 'Recycled Polyester', 125);", "sql": "SELECT b.name, SUM(mu.quantity) as total_quantity FROM brands b INNER JOIN materials_used mu ON b.id = mu.brand_id GROUP BY b.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Delete all records from the military equipment table that are older than 10 years", "schema": "CREATE TABLE military_equipment (equipment_type VARCHAR(255), purchase_date DATE); INSERT INTO military_equipment (equipment_type, purchase_date) VALUES ('Tank', '2011-01-01'), ('Jet', '2012-01-01'), ('Submarine', '2005-01-01');", "sql": "DELETE FROM military_equipment WHERE purchase_date < '2011-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average rating of products manufactured using ethical labor practices?", "schema": "CREATE TABLE Manufacturing (product_id INT, labor_practices TEXT); INSERT INTO Manufacturing (product_id, labor_practices) VALUES (1, 'ethical'), (2, 'unethical'), (3, 'ethical'); CREATE TABLE Ratings (product_id INT, rating FLOAT); INSERT INTO Ratings (product_id, rating) VALUES (1, 4.5), (1, 4.7), (2, 3.2), (3, 4.9), (3, 4.8);", "sql": "SELECT AVG(Ratings.rating) FROM Ratings JOIN Manufacturing ON Ratings.product_id = Manufacturing.product_id WHERE Manufacturing.labor_practices = 'ethical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What is the minimum sustainable material quantity for each garment type in the 'sustainable_materials' table?", "schema": "CREATE TABLE sustainable_materials (id INT, garment VARCHAR(20), material VARCHAR(20), quantity INT); INSERT INTO sustainable_materials (id, garment, material, quantity) VALUES (1, 'tshirt', 'cotton', 3000), (2, 'pants', 'wool', 2000), (3, 'dress', 'silk', 4000), (4, 'jacket', 'polyester', 5000), (5, 'shirt', 'cotton', 2500), (6, 'blouse', 'silk', 3000);", "sql": "SELECT garment, MIN(quantity) FROM sustainable_materials GROUP BY garment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Club Team has a Position of (d), a Nationality of canada, and a Player of andrew macwilliam?", "schema": "CREATE TABLE table_name_7 (club_team VARCHAR, player VARCHAR, position VARCHAR, nationality VARCHAR)", "sql": "SELECT club_team FROM table_name_7 WHERE position = '(d)' AND nationality = 'canada' AND player = 'andrew macwilliam';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "For each program, list the number of volunteers, total volunteer hours, and the average volunteer hours per volunteer.", "schema": "CREATE TABLE Programs (ProgramID int, Name varchar(50), Location varchar(50)); CREATE TABLE Volunteers (VolunteerID int, Name varchar(50), ProgramID int, Hours decimal(10,2)); INSERT INTO Programs (ProgramID, Name, Location) VALUES (1, 'Feeding America', 'USA'), (2, 'Habitat for Humanity', 'Canada'); INSERT INTO Volunteers (VolunteerID, Name, ProgramID, Hours) VALUES (1, 'Bob', 1, 10.00), (2, 'Sally', 1, 15.00), (3, 'John', 2, 20.00);", "sql": "SELECT P.Name, COUNT(V.VolunteerID) as VolunteerCount, SUM(V.Hours) as TotalHours, AVG(V.Hours) as AvgHoursPerVolunteer FROM Programs P JOIN Volunteers V ON P.ProgramID = V.ProgramID GROUP BY P.ProgramID, P.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the horsepower for VIN code of h and engine for 351-2v cleveland v8", "schema": "CREATE TABLE table_name_94 (horsepower VARCHAR, vin_code VARCHAR, engine VARCHAR)", "sql": "SELECT horsepower FROM table_name_94 WHERE vin_code = 'h' AND engine = '351-2v cleveland v8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table_like' (example 3).", "schema": null, "sql": "CREATE TABLE ctlb (bb TEXT) INHERITS (ctla);", "explanation": "DDL from PostgreSQL core regression test for Create Table Like.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "What is the total number of libraries and parks in California, and how many of them are located in Los Angeles County?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); INSERT INTO states (id, name) VALUES (1, 'California'); CREATE TABLE libraries (id INT, state_id INT, name VARCHAR(255)); INSERT INTO libraries (id, state_id) VALUES (1, 1), (2, 1), (3, 1); CREATE TABLE parks (id INT, state_id INT, name VARCHAR(255)); INSERT INTO parks (id, state_id) VALUES (1, 1), (2, 1), (3, 1); CREATE TABLE counties (id INT, state_id INT, name VARCHAR(255)); INSERT INTO counties (id, state_id, name) VALUES (1, 1, 'Los Angeles County');", "sql": "SELECT COUNT(libraries.id) + COUNT(parks.id) AS total_locations, COUNT(counties.name) AS la_county_locations FROM libraries INNER JOIN states ON libraries.state_id = states.id INNER JOIN parks ON libraries.state_id = parks.state_id INNER JOIN counties ON states.id = counties.state_id WHERE states.name = 'California' AND counties.name = 'Los Angeles County';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 359, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the distinct cross reference source system codes which are related to the master customer details 'Gottlieb, Becker and Wyman'?", "schema": "CREATE TABLE customer_master_index (master_customer_id VARCHAR, cmi_details VARCHAR); CREATE TABLE cmi_cross_references (source_system_code VARCHAR, master_customer_id VARCHAR)", "sql": "SELECT DISTINCT T2.source_system_code FROM customer_master_index AS T1 JOIN cmi_cross_references AS T2 ON T1.master_customer_id = T2.master_customer_id WHERE T1.cmi_details = 'Gottlieb , Becker and Wyman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "What is the carbon sequestration capacity (in tons) for each species in the 'carbon_sequestration' table?", "schema": "CREATE TABLE carbon_sequestration (id INT, species VARCHAR(255), sequestration_rate FLOAT); INSERT INTO carbon_sequestration (id, species, sequestration_rate) VALUES (1, 'Oak', 2.5), (2, 'Maple', 2.3), (3, 'Pine', 2.8);", "sql": "SELECT species, sequestration_rate * 10 AS carbon_sequestration_capacity FROM carbon_sequestration;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 162).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('garbage', 'date');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('garbage', 'date')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Shared (example 3).", "schema": null, "sql": "create function getme(key text) returns text language plperl as $$\n\n my $key = shift;\n return $_SHARED{$key};\n\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Shared.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: Which Administrative district has a Pre-1009 province of gwannae -do and a Post-1009 province of seohae -do?", "schema": "CREATE TABLE table_name_76 (administrative_district VARCHAR, pre_1009_province VARCHAR, post_1009_province VARCHAR)", "sql": "SELECT administrative_district FROM table_name_76 WHERE pre_1009_province = 'gwannae -do' AND post_1009_province = 'seohae -do';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Find the combined biomass of all farmed species in Chile.", "schema": "CREATE TABLE FarmI (country VARCHAR(20), species VARCHAR(20), biomass FLOAT); INSERT INTO FarmI (country, species, biomass) VALUES ('Chile', 'Salmon', 300000); INSERT INTO FarmI (country, species, biomass) VALUES ('Chile', 'Cod', 150000); INSERT INTO FarmI (country, species, biomass) VALUES ('Chile', 'Tilapia', 100000);", "sql": "SELECT SUM(biomass) FROM FarmI WHERE country='Chile';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total biomass of all marine life in the Indian Ocean, restricted to species with a recorded biomass greater than 500?", "schema": "CREATE TABLE marine_life (species TEXT, ocean TEXT, biomass INT); INSERT INTO marine_life (species, ocean, biomass) VALUES ('Whale Shark', 'Indian Ocean', 2000), ('Manta Ray', 'Indian Ocean', 300), ('Tuna', 'Indian Ocean', 150), ('Dolphin', 'Indian Ocean', 100);", "sql": "SELECT SUM(biomass) FROM marine_life WHERE ocean = 'Indian Ocean' AND biomass > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What was the average methane emission in 2019?", "schema": "CREATE TABLE gas_emissions (year INT, methane_emissions FLOAT); INSERT INTO gas_emissions", "sql": "SELECT AVG(methane_emissions) FROM gas_emissions WHERE year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the \"amount\" of the \"ForestryProject1\" in the \"CarbonOffset\" table to 6000", "schema": "CREATE TABLE CarbonOffset (id INT, project_name VARCHAR(20), project_type VARCHAR(20), amount INT); INSERT INTO CarbonOffset (id, project_name, project_type, amount) VALUES (1, 'ForestryProject1', 'forestry', 5000), (2, 'RenewableEnergyProject1', 'renewable', 6000), (3, 'EnergyEfficiencyProject1', 'energy_efficiency', 4000);", "sql": "UPDATE CarbonOffset SET amount = 6000 WHERE project_name = 'ForestryProject1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the maximum number of military equipment items sold in a single transaction?", "schema": "CREATE TABLE sales (id INT, quantity INT); INSERT INTO sales (id, quantity) VALUES (1, 100), (2, 50), (3, 150);", "sql": "SELECT MAX(quantity) as max_quantity FROM sales;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 123).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('ng_catalog.||/', 'regoper');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('ng_catalog.||/', 'regoper')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the minimum mass of a spacecraft manufactured by 'Space Explorers Ltd.'?", "schema": "CREATE TABLE spacecraft (id INT, name VARCHAR(255), manufacturer VARCHAR(255), mass FLOAT); INSERT INTO spacecraft (id, name, manufacturer, mass) VALUES (1, 'Voyager 1', 'Galactic Pioneers Inc.', 770.), (2, 'Voyager 2', 'Galactic Pioneers Inc.', 780.), (3, 'New Horizons', 'Space Explorers Ltd.', 1010.);", "sql": "SELECT MIN(mass) FROM spacecraft WHERE manufacturer = 'Space Explorers Ltd.';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the combined total installed capacity for 'Wind' and 'Solar' technologies in all cities?", "schema": "CREATE TABLE renewable_energy_projects (city VARCHAR(50), technology VARCHAR(50), capacity FLOAT); INSERT INTO renewable_energy_projects (city, technology, capacity) VALUES ('CityA', 'Solar', 500.0), ('CityA', 'Wind', 600.0), ('CityB', 'Solar', 750.0), ('CityB', 'Wind', 800.0);", "sql": "SELECT SUM(capacity) FROM renewable_energy_projects WHERE technology IN ('Solar', 'Wind');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total number of public participation events in North American cities?", "schema": "CREATE TABLE City (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO City (id, name, region) VALUES (1, 'New York', 'North America'); INSERT INTO City (id, name, region) VALUES (2, 'Los Angeles', 'North America'); INSERT INTO City (id, name, region) VALUES (3, 'Toronto', 'North America'); CREATE TABLE PublicParticipation (id INT, city_id INT, event VARCHAR(255)); INSERT INTO PublicParticipation (id, city_id, event) VALUES (1, 1, 'Public Meeting'); INSERT INTO PublicParticipation (id, city_id, event) VALUES (2, 1, 'Community Workshop'); INSERT INTO PublicParticipation (id, city_id, event) VALUES (3, 2, 'Town Hall'); INSERT INTO PublicParticipation (id, city_id, event) VALUES (4, 3, 'Citizen Survey');", "sql": "SELECT COUNT(*) FROM PublicParticipation JOIN City ON PublicParticipation.city_id = City.id WHERE City.region = 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 70).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Kevin');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the average number of employees for gold mines located in Canada?", "schema": "CREATE TABLE gold_mines (id INT, name VARCHAR(50), location VARCHAR(50), size INT, num_employees INT, co2_emissions INT); INSERT INTO gold_mines VALUES (1, 'Gold Mine 1', 'Canada', 450, 320, 22000); INSERT INTO gold_mines VALUES (2, 'Gold Mine 2', 'Canada', 600, 450, 28000); INSERT INTO gold_mines VALUES (3, 'Gold Mine 3', 'Mexico', 200, 180, 16000);", "sql": "SELECT AVG(num_employees) FROM gold_mines WHERE location = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "List the names of the top five countries with the highest visitor counts at heritage sites in 'Asia'.", "schema": "CREATE TABLE HeritageSitesAsia (SiteID INT PRIMARY KEY, SiteName VARCHAR(50), Country VARCHAR(50), VisitorCount INT); INSERT INTO HeritageSitesAsia (SiteID, SiteName, Country, VisitorCount) VALUES (1, 'Angkor Wat', 'Cambodia', 2500000), (2, 'Taj Mahal', 'India', 3000000);", "sql": "SELECT Country, SUM(VisitorCount) as Total FROM HeritageSitesAsia GROUP BY Country ORDER BY Total DESC LIMIT 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are the tries where the game was lost by 4?", "schema": "CREATE TABLE table_13564637_3 (tries_against VARCHAR, lost VARCHAR)", "sql": "SELECT tries_against FROM table_13564637_3 WHERE lost = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 225).", "schema": null, "sql": "insert into rtest_nothn4 values (50, 'too small');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the first day cover cancellation for the University of Saskatchewan themed stamp?", "schema": "CREATE TABLE table_11900773_5 (first_day_cover_cancellation VARCHAR, theme VARCHAR)", "sql": "SELECT first_day_cover_cancellation FROM table_11900773_5 WHERE theme = 'University of Saskatchewan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the average age of audience members who attended performing arts events in urban areas of New York and Pennsylvania?", "schema": "CREATE TABLE Events (id INT, state VARCHAR(2), city VARCHAR(20), category VARCHAR(20), attendees INT, event_date DATE); INSERT INTO Events (id, state, city, category, attendees, event_date) VALUES (1, 'NY', 'New York', 'Theater', 500, '2022-01-01'), (2, 'PA', 'Philadelphia', 'Dance', 300, '2022-02-01'), (3, 'NY', 'Albany', 'Music', 400, '2022-03-01'); CREATE TABLE Audience (id INT, state VARCHAR(2), zip INT, age INT); INSERT INTO Audience (id, state, zip, age) VALUES (1, 'NY', 10000, 30), (2, 'PA', 19000, 40), (3, 'NY', 12000, 35); CREATE TABLE Zipcodes (zip INT, city VARCHAR(20), urban VARCHAR(5)); INSERT INTO Zipcodes (zip, city, urban) VALUES (10000, 'New York', 'yes'), (19000, 'Philadelphia', 'yes'), (12000, 'Albany', 'yes');", "sql": "SELECT AVG(Audience.age) FROM Events INNER JOIN Audience ON Events.state = Audience.state INNER JOIN Zipcodes ON Audience.zip = Zipcodes.zip WHERE urban = 'yes' AND Events.category IN ('Theater', 'Dance') AND Events.state IN ('NY', 'PA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 238, "num_statements": 1} {"question": "What is the percentage of global Dysprosium production in 2019 that came from Africa?", "schema": "CREATE TABLE production (element VARCHAR(10), year INT, region VARCHAR(10), quantity INT); INSERT INTO production (element, year, region, quantity) VALUES ('Dysprosium', 2019, 'Africa', 320), ('Dysprosium', 2019, 'Asia', 1200), ('Dysprosium', 2019, 'North America', 400), ('Dysprosium', 2019, 'Europe', 500);", "sql": "SELECT (SUM(CASE WHEN region = 'Africa' THEN quantity ELSE 0 END) / SUM(quantity)) * 100 FROM production WHERE element = 'Dysprosium' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "List the names of ingredients and their sourcing countries, sorted by the sourcing date in descending order.", "schema": "CREATE TABLE ingredients (ingredient_id INT, name TEXT, sourcing_country TEXT, source_date DATE); INSERT INTO ingredients (ingredient_id, name, sourcing_country, source_date) VALUES (1, 'Water', 'China', '2021-01-01'), (2, 'Glycerin', 'France', '2021-02-15'), (3, 'Retinol', 'USA', '2020-12-10');", "sql": "SELECT name, sourcing_country, source_date FROM ingredients ORDER BY source_date DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 2013 for 2011 of 1r", "schema": "CREATE TABLE table_name_9 (Id VARCHAR)", "sql": "SELECT 2013 FROM table_name_9 WHERE 2011 = '1r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 154).", "schema": null, "sql": "select view_a from view_a;", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select view_a from view_a) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Identify the astrophysics research having the highest number of citations.", "schema": "CREATE TABLE astrophysics_research(id INT, title VARCHAR(50), citations INT); INSERT INTO astrophysics_research(id, title, citations) VALUES (1, 'Stars and Galaxies', 150); INSERT INTO astrophysics_research(id, title, citations) VALUES (2, 'Black Holes', 200);", "sql": "SELECT title FROM astrophysics_research WHERE citations = (SELECT MAX(citations) FROM astrophysics_research);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the total billing amount for cases in the 'Southern' region, excluding cases with a billing amount less than $5000?", "schema": "CREATE TABLE cases (id INT, region VARCHAR(10), billing_amount INT); INSERT INTO cases (id, region, billing_amount) VALUES (1, 'Eastern', 5000), (2, 'Western', 7000), (3, 'Eastern', 6000), (4, 'Southern', 8000), (5, 'Southern', 4000);", "sql": "SELECT SUM(billing_amount) FROM cases WHERE region = 'Southern' AND billing_amount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the oldest artifact in 'SiteJ'?", "schema": "CREATE TABLE SiteJ (id INT PRIMARY KEY, artifact_name VARCHAR(50), date_found DATE); INSERT INTO SiteJ (id, artifact_name, date_found) VALUES (1, 'Iron Axehead', '2017-07-22'), (2, 'Leather Pouch', '2016-11-03');", "sql": "SELECT artifact_name FROM (SELECT artifact_name, ROW_NUMBER() OVER (ORDER BY date_found ASC) rn FROM SiteJ) t WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 123, "num_statements": 1} {"question": "What is the average mass of Mars rovers?", "schema": "CREATE TABLE mars_rovers (rover_name TEXT, rover_mass REAL); INSERT INTO mars_rovers (rover_name, rover_mass) VALUES ('Sojourner', 10.6), ('Spirit', 174), ('Opportunity', 174);", "sql": "SELECT AVG(rover_mass) FROM mars_rovers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What is the maximum acres of habitat preserved for each animal species?", "schema": "CREATE TABLE habitat_preservation (id INT, animal_id INT, location VARCHAR(50), acres FLOAT);INSERT INTO habitat_preservation (id, animal_id, location, acres) VALUES (1, 1, 'Asia', 10000), (2, 2, 'Africa', 15000), (3, 3, 'Africa', 5000);CREATE TABLE animal_population (id INT, species VARCHAR(50), population INT);INSERT INTO animal_population (id, species, population) VALUES (1, 'Tiger', 250), (2, 'Elephant', 500), (3, 'Giraffe', 300);", "sql": "SELECT ap.species, MAX(h.acres) FROM animal_population ap JOIN habitat_preservation h ON ap.id = h.animal_id GROUP BY ap.species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Update the capacities of all warehouses located in California", "schema": "CREATE TABLE warehouse (id INT, city VARCHAR(20), capacity INT); INSERT INTO warehouse (id, city, capacity) VALUES (1, 'Chicago', 1000), (2, 'Houston', 1500), (3, 'Miami', 800), (4, 'Los Angeles', 1200), (5, 'San Francisco', 1800); CREATE TABLE state (id INT, name VARCHAR(20)); INSERT INTO state (id, name) VALUES (1, 'California'), (2, 'Texas'), (3, 'Florida'); CREATE VIEW warehouse_state AS SELECT * FROM warehouse INNER JOIN state ON warehouse.id = state.id;", "sql": "UPDATE warehouse_state SET capacity = 1500 WHERE name IN ('California');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Nationality / Opponent of rosario?", "schema": "CREATE TABLE table_name_64 (competition VARCHAR, nationality___opponent VARCHAR)", "sql": "SELECT competition FROM table_name_64 WHERE nationality___opponent = 'rosario';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the average amount of ties had when a team wins 6 and it's past the 2004 season?", "schema": "CREATE TABLE table_name_55 (ties INTEGER, wins VARCHAR, season VARCHAR)", "sql": "SELECT AVG(ties) FROM table_name_55 WHERE wins = 6 AND season > 2004;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Which satellites were deployed by SpaceX in the year 2020?", "schema": "CREATE TABLE Satellites (name VARCHAR(50), manufacturer VARCHAR(50), launch_year INT); INSERT INTO Satellites (name, manufacturer, launch_year) VALUES ('Starlink 1', 'SpaceX', 2020), ('Starlink 2', 'SpaceX', 2020);", "sql": "SELECT name FROM Satellites WHERE manufacturer = 'SpaceX' AND launch_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 4 is the v-band what is the overall amount of ka-bands?", "schema": "CREATE TABLE table_186468_1 (ka_band VARCHAR, v_band VARCHAR)", "sql": "SELECT COUNT(ka_band) FROM table_186468_1 WHERE v_band = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What league played in 2011?", "schema": "CREATE TABLE table_name_65 (league VARCHAR, year VARCHAR)", "sql": "SELECT league FROM table_name_65 WHERE year = 2011;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total cost of all road projects in the city of New York with a budget over $10 million?", "schema": "CREATE TABLE RoadProjects (id INT, name TEXT, city TEXT, budget DECIMAL(10,2));", "sql": "SELECT SUM(budget) FROM RoadProjects WHERE city = 'New York' AND budget > 10000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE FOREIGN TABLE (example 1).", "schema": null, "sql": "CREATE FOREIGN TABLE films ( code char(5) NOT NULL, title varchar(40) NOT NULL, did integer NOT NULL, date_prod date, kind varchar(10), len interval hour to minute ) SERVER film_server;", "explanation": "PostgreSQL CREATE FOREIGN TABLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which 2008 Tournament has a 2007 of a and a 2010 of qf?", "schema": "CREATE TABLE table_name_46 (Id VARCHAR)", "sql": "SELECT 2008 FROM table_name_46 WHERE 2007 = 'a' AND 2010 = 'qf';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 45).", "schema": null, "sql": "CREATE OPERATOR CLASS timetz_ops\nDEFAULT FOR TYPE timetz USING gin\nAS\n OPERATOR 1 <,\n OPERATOR 2 <=,\n OPERATOR 3 =,\n OPERATOR 4 >=,\n OPERATOR 5 >,\n FUNCTION 1 timetz_cmp(timetz,timetz),\n FUNCTION 2 gin_extract_value_timetz(timetz, internal),\n FUNCTION 3 gin_extract_query_timetz(timetz, internal, int2, internal, internal),\n FUNCTION 4 gin_btree_consistent(internal, int2, anyelement, int4, internal, internal),\n FUNCTION 5 gin_compare_prefix_timetz(timetz,timetz,int2, internal),\nSTORAGE timetz;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 664, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many places have an area of 409.41 (km2)?", "schema": "CREATE TABLE table_261222_1 (population__2010_ VARCHAR, area__km_2__ VARCHAR)", "sql": "SELECT COUNT(population__2010_) FROM table_261222_1 WHERE area__km_2__ = '409.41';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many heights does chad henne have?", "schema": "CREATE TABLE table_15017024_2 (height VARCHAR, player_name VARCHAR)", "sql": "SELECT COUNT(height) FROM table_15017024_2 WHERE player_name = 'Chad Henne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total number of employees in the 'mining_operations' table, grouped by their job titles?", "schema": "CREATE TABLE mining_operations (id INT, name VARCHAR(50), job_title VARCHAR(50), department VARCHAR(50), experience INT); INSERT INTO mining_operations (id, name, job_title, department, experience) VALUES (1, 'John Doe', 'Mining Engineer', 'Operations', 7); INSERT INTO mining_operations (id, name, job_title, department, experience) VALUES (2, 'Jane Smith', 'Geologist', 'Exploration', 5);", "sql": "SELECT job_title, COUNT(*) as total_employees FROM mining_operations GROUP BY job_title;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "insert new exhibit data into 'exhibition_statistics'", "schema": "CREATE TABLE exhibition_statistics (exhibit_id INT, attendance INT);", "sql": "INSERT INTO exhibition_statistics (exhibit_id, attendance) VALUES (3, 600), (4, 800);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Which exhibition received the most digital interactions in London?", "schema": "CREATE TABLE DigitalInteractions (id INT, city VARCHAR(50), exhibition VARCHAR(50), digital_interactions INT);", "sql": "SELECT exhibition, MAX(digital_interactions) FROM DigitalInteractions WHERE city = 'London' GROUP BY exhibition ORDER BY MAX(digital_interactions) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the percentage of organic cotton used in garments manufactured in Europe?", "schema": "CREATE TABLE materials (garment_id INT, material VARCHAR(255), organic BOOLEAN); INSERT INTO materials (garment_id, material, organic) VALUES (1, 'cotton', TRUE); INSERT INTO materials (garment_id, material, organic) VALUES (2, 'polyester', FALSE); INSERT INTO materials (garment_id, material, organic) VALUES (3, 'silk', FALSE);", "sql": "SELECT (COUNT(CASE WHEN organic THEN 1 END)/COUNT(*))*100 FROM materials WHERE material = 'cotton' AND garment_id IN (SELECT garment_id FROM garments WHERE manufacture_location LIKE 'Europe%');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 4.", "schema": null, "sql": "CREATE FUNCTION sales_tax(subtotal real) RETURNS real AS $$ BEGIN RETURN subtotal * 0.06; END; $$ LANGUAGE plpgsql;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 115, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is the sum of the area for the bahawalnagar district with a population more than 134,936?", "schema": "CREATE TABLE table_name_32 (city_area_km_2__ INTEGER, district VARCHAR, city_population__2009_ VARCHAR)", "sql": "SELECT SUM(city_area_km_2__) FROM table_name_32 WHERE district = 'bahawalnagar district' AND city_population__2009_ > 134 OFFSET 936;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the maximum recycling rate for plastic in 2020 for each state?", "schema": "CREATE TABLE recycling_rates(year INT, state VARCHAR(255), plastic_recycling FLOAT, paper_recycling FLOAT, glass_recycling FLOAT); INSERT INTO recycling_rates VALUES (2020, 'California', 0.6, 0.7, 0.5), (2020, 'Texas', 0.5, 0.6, 0.4);", "sql": "SELECT MAX(plastic_recycling) AS max_plastic_recycling, state FROM recycling_rates WHERE year = 2020 GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Show the number of security incidents and their category by year", "schema": "CREATE TABLE incident_yearly (id INT, incident_date DATE, category VARCHAR(10)); INSERT INTO incident_yearly (id, incident_date, category) VALUES (1, '2021-01-01', 'Malware'), (2, '2021-01-15', 'Phishing'), (3, '2022-01-01', 'Insider Threat'), (4, '2022-01-01', 'DDoS'), (5, '2022-02-01', 'Phishing'), (6, '2023-03-01', 'Insider Threat');", "sql": "SELECT EXTRACT(YEAR FROM incident_date) as year, category, COUNT(*) as incidents FROM incident_yearly GROUP BY year, category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which is the method under the event Jungle Fight 2?", "schema": "CREATE TABLE table_name_24 (method VARCHAR, event VARCHAR)", "sql": "SELECT method FROM table_name_24 WHERE event = 'jungle fight 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Create a view named 'space_debris_view' showing all debris entries", "schema": "CREATE TABLE space_debris (id INT PRIMARY KEY, debris_id INT, debris_name VARCHAR(255), launch_date DATE, location VARCHAR(255), type VARCHAR(255)); CREATE VIEW space_debris_view AS SELECT * FROM space_debris;", "sql": "CREATE VIEW space_debris_view AS SELECT * FROM space_debris;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Find the daily net transaction amount (total transaction value - total withdrawal value) for the past week, grouped by day and ordered by day in ascending order for customers in Europe?", "schema": "CREATE TABLE customer_europe_transactions (transaction_id INT, customer_id INT, transaction_value DECIMAL(10, 2), transaction_type VARCHAR(10), transaction_date DATE, customer_region VARCHAR(20)); INSERT INTO customer_europe_transactions (transaction_id, customer_id, transaction_value, transaction_type, transaction_date, customer_region) VALUES (1, 1, 12000, 'deposit', '2021-07-01', 'Europe'), (2, 2, -5000, 'withdrawal', '2021-07-01', 'Europe'), (3, 3, 8000, 'deposit', '2021-07-02', 'Europe');", "sql": "SELECT customer_region, transaction_date, SUM(CASE WHEN transaction_type = 'deposit' THEN transaction_value ELSE -transaction_value END) as daily_net_transaction_amount FROM customer_europe_transactions WHERE transaction_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY) AND CURRENT_DATE AND customer_region = 'Europe' GROUP BY customer_region, transaction_date ORDER BY transaction_date ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 394, "num_statements": 1} {"question": "What is the maximum number of intelligence operations conducted by the United States in a single year?", "schema": "CREATE TABLE intelligence_operations (country VARCHAR(255), year INT, num_operations INT); INSERT INTO intelligence_operations (country, year, num_operations) VALUES ('United States', 2015, 5000), ('United States', 2016, 6000), ('United Kingdom', 2015, 3000), ('United Kingdom', 2016, 3500);", "sql": "SELECT MAX(num_operations) FROM intelligence_operations WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total number of garments sold for each manufacturer, partitioned by the manufacturer and ordered by the total number of garments sold in descending order?", "schema": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(255));CREATE TABLE Garments (GarmentID INT, ManufacturerID INT);CREATE TABLE Sales (SaleID INT, GarmentID INT, SaleDate DATE, Quantity INT);", "sql": "SELECT m.ManufacturerName, COUNT(g.GarmentID) AS TotalGarments, ROW_NUMBER() OVER (PARTITION BY NULL ORDER BY COUNT(g.GarmentID) DESC) AS Rank FROM Manufacturers m JOIN Garments g ON m.ManufacturerID = g.ManufacturerID JOIN Sales s ON g.GarmentID = s.GarmentID GROUP BY m.ManufacturerName ORDER BY Rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 303, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the singer for joe wilkinson", "schema": "CREATE TABLE table_29135051_2 (singer_s_ VARCHAR, comedian VARCHAR)", "sql": "SELECT singer_s_ FROM table_29135051_2 WHERE comedian = 'Joe Wilkinson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What are the details of the policies that have not been reviewed in the last 90 days and the number of systems associated with each?", "schema": "CREATE TABLE policies (policy_id INT, policy_name VARCHAR(255), owner VARCHAR(255), review_date DATE, num_systems INT);CREATE TABLE policy_systems (policy_id INT, system_id INT);", "sql": "SELECT p.policy_name, p.owner, p.num_systems, COUNT(ps.system_id) as associated_systems FROM policies p LEFT JOIN policy_systems ps ON p.policy_id = ps.policy_id WHERE p.review_date < DATE_SUB(CURRENT_DATE, INTERVAL 90 DAY) GROUP BY p.policy_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 245, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of 1993?", "schema": "CREATE TABLE table_name_91 (score VARCHAR, year VARCHAR)", "sql": "SELECT score FROM table_name_91 WHERE year = '1993';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Display the names of therapists who have conducted therapy sessions for patients diagnosed with 'Anxiety Disorder'", "schema": "CREATE TABLE therapists (therapist_id INT PRIMARY KEY, therapist_name TEXT, specialization TEXT); CREATE TABLE patients (patient_id INT PRIMARY KEY, patient_name TEXT, date_of_birth DATE, diagnosis TEXT); CREATE TABLE therapy_sessions (session_id INT PRIMARY KEY, patient_id INT, therapist_id INT, session_date DATE, session_duration TIME);", "sql": "SELECT therapists.therapist_name FROM therapists INNER JOIN (SELECT patients.patient_id, therapy_sessions.therapist_id FROM patients INNER JOIN therapy_sessions ON patients.patient_id = therapy_sessions.patient_id WHERE patients.diagnosis = 'Anxiety Disorder') AS therapy_sessions_filtered ON therapists.therapist_id = therapy_sessions_filtered.therapist_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 358, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 170).", "schema": null, "sql": "SELECT to_json(a) AS a, JSON_OBJECTAGG(k : v ABSENT ON NULL)\nOVER (ORDER BY k)\nFROM (VALUES (1,1), (1,null), (2,2)) a(k,v);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_json(a) AS a, JSON_OBJECTAGG(k : v ABSENT ON NULL)\nOVER (ORDER BY k)\nFROM (VALUES (1,1), (1,null), (2,2)) a(k,v)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 123, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 22).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (0,2,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Game smaller than 34, and a December smaller than 14, and a Score of 10 - 6 has what opponent?", "schema": "CREATE TABLE table_name_46 (opponent VARCHAR, score VARCHAR, game VARCHAR, december VARCHAR)", "sql": "SELECT opponent FROM table_name_46 WHERE game < 34 AND december < 14 AND score = '10 - 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What percentage did McCain get in Hamilton county?", "schema": "CREATE TABLE table_20799905_1 (mccain_percentage VARCHAR, county VARCHAR)", "sql": "SELECT mccain_percentage FROM table_20799905_1 WHERE county = 'HAMILTON';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'drop_if_exists' (example 3).", "schema": null, "sql": "CREATE TABLE test_exists (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Drop If Exists.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'tidrangescan': Write the SELECT query (example 15).", "schema": null, "sql": "SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';", "explanation": "Regression test for Tidrangescan in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Count the number of skincare products that contain natural ingredients and have a price above the median?", "schema": "CREATE TABLE products(product_id INT, category VARCHAR(255), has_natural_ingredients BOOLEAN, price DECIMAL(5,2));INSERT INTO products (product_id, category, has_natural_ingredients, price) VALUES (1, 'Cleanser', true, 19.99), (2, 'Toner', false, 12.99), (3, 'Serum', true, 35.99), (4, 'Moisturizer', false, 24.99);", "sql": "SELECT COUNT(*) FROM products WHERE category = 'skincare' AND has_natural_ingredients = true AND price > (SELECT AVG(price) FROM products WHERE category = 'skincare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_data' (example 177).", "schema": null, "sql": "CREATE FOREIGN TABLE ft_part_1_2 (a INT NOT NULL) SERVER s0;", "explanation": "DDL from PostgreSQL core regression test for Foreign Data.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 153).", "schema": null, "sql": "SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS \"false\";", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS \"false\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 60, "num_statements": 1} {"question": "What are the total sales for each drug in Q3 2020?", "schema": "CREATE TABLE drugs (drug_id INT, drug_name TEXT); INSERT INTO drugs (drug_id, drug_name) VALUES (1001, 'Ibuprofen'), (1002, 'Paracetamol'), (1003, 'Aspirin'); CREATE TABLE sales (sale_id INT, drug_id INT, sale_date DATE, revenue FLOAT); INSERT INTO sales (sale_id, drug_id, sale_date, revenue) VALUES (1, 1001, '2020-07-05', 1800.0), (2, 1002, '2020-08-10', 2300.0), (3, 1003, '2020-09-15', 1400.0), (4, 1001, '2020-10-20', 1900.0), (5, 1002, '2020-11-25', 2400.0);", "sql": "SELECT drug_name, SUM(revenue) as total_sales FROM sales JOIN drugs ON sales.drug_id = drugs.drug_id WHERE sale_date BETWEEN '2020-07-01' AND '2020-09-30' GROUP BY drug_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Insert new records for a game designer who has adopted virtual reality technology.", "schema": "CREATE TABLE GameDesigners (DesignerID INT, Name VARCHAR(30), VRAdoption BOOLEAN);", "sql": "INSERT INTO GameDesigners (DesignerID, Name, VRAdoption) VALUES (1, 'John Doe', TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total humanitarian assistance provided by country in the last 3 years?", "schema": "CREATE TABLE HumanitarianAssistance(Year INT, Country NVARCHAR(50), Amount DECIMAL(18,2));INSERT INTO HumanitarianAssistance(Year, Country, Amount) VALUES (2018, 'United States', 6500000000), (2018, 'Germany', 2000000000), (2019, 'United States', 7000000000), (2019, 'Germany', 2500000000), (2020, 'United States', 8000000000), (2020, 'Germany', 3000000000);", "sql": "SELECT Country, SUM(Amount) AS Total_Assistance FROM HumanitarianAssistance WHERE Year >= 2018 GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Venue when the result was 88–87?", "schema": "CREATE TABLE table_name_78 (venue VARCHAR, result VARCHAR)", "sql": "SELECT venue FROM table_name_78 WHERE result = '88–87';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total cargo capacity of vessels registered in Spain?", "schema": "CREATE TABLE Vessels (ID VARCHAR(10), Name VARCHAR(20), Type VARCHAR(20), Cargo_Capacity FLOAT); INSERT INTO Vessels (ID, Name, Type, Cargo_Capacity) VALUES ('1', 'Vessel A', 'Cargo', 10000.0), ('2', 'Vessel B', 'Tanker', 15000.0); CREATE TABLE Registry (ID VARCHAR(10), Vessel_ID VARCHAR(10), Registered_Country VARCHAR(20)); INSERT INTO Registry (ID, Vessel_ID, Registered_Country) VALUES ('1', '1', 'USA'), ('2', '2', 'Spain');", "sql": "SELECT SUM(Cargo_Capacity) FROM Vessels INNER JOIN Registry ON Vessels.ID = Registry.Vessel_ID WHERE Registered_Country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the maximum weight of packages shipped between countries in the Middle East in the last week?", "schema": "CREATE TABLE package_middle_east (id INT, package_weight FLOAT, shipped_from VARCHAR(20), shipped_to VARCHAR(20), shipped_date DATE); INSERT INTO package_middle_east (id, package_weight, shipped_from, shipped_to, shipped_date) VALUES (1, 4.5, 'Saudi Arabia', 'UAE', '2022-02-03');", "sql": "SELECT MAX(package_weight) FROM package_middle_east WHERE shipped_from LIKE 'Middle East%' AND shipped_to LIKE 'Middle East%' AND shipped_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the city for thailand", "schema": "CREATE TABLE table_name_97 (city VARCHAR, country VARCHAR)", "sql": "SELECT city FROM table_name_97 WHERE country = 'thailand';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the March playmate with an August playmate Gianna Amore?", "schema": "CREATE TABLE table_name_53 (march VARCHAR, august VARCHAR)", "sql": "SELECT march FROM table_name_53 WHERE august = 'gianna amore';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the french word for fribourg?", "schema": "CREATE TABLE table_name_72 (french VARCHAR, common_english VARCHAR)", "sql": "SELECT french FROM table_name_72 WHERE common_english = 'fribourg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name and capacity of products with price greater than 700 (in USD).", "schema": "CREATE TABLE Catalog_Contents (catalog_entry_name VARCHAR, capacity VARCHAR, price_in_dollars INTEGER)", "sql": "SELECT catalog_entry_name, capacity FROM Catalog_Contents WHERE price_in_dollars > 700;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was her highest Super G score with a Slalom score of 58 and an Overall larger than 2?", "schema": "CREATE TABLE table_name_43 (super_g INTEGER, slalom VARCHAR, overall VARCHAR)", "sql": "SELECT MAX(super_g) FROM table_name_43 WHERE slalom = '58' AND overall > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which loss had a player lower than 18?", "schema": "CREATE TABLE table_name_63 (lost INTEGER, played INTEGER)", "sql": "SELECT MIN(lost) FROM table_name_63 WHERE played < 18;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 128).", "schema": null, "sql": "create view view_of_joins_2a as select * from tbl1 join tbl1a using (a);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Which satellites were deployed before 2010 and after 2005?", "schema": "CREATE TABLE Satellite (ID INT, LaunchDate DATE);", "sql": "SELECT * FROM Satellite WHERE LaunchDate BETWEEN '2005-01-01' AND '2010-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'truncate' (example 135).", "schema": null, "sql": "CREATE TABLE truncparted1 PARTITION OF truncparted FOR VALUES IN (1);", "explanation": "DDL from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the maximum labor productivity in the coal mining industry in the state \"West Virginia\" for the year 2020?", "schema": "CREATE TABLE coal_mining_productivity (id INT, state TEXT, year INT, productivity FLOAT); INSERT INTO coal_mining_productivity (id, state, year, productivity) VALUES (1, 'West Virginia', 2018, 3.5); INSERT INTO coal_mining_productivity (id, state, year, productivity) VALUES (2, 'West Virginia', 2020, 4.2);", "sql": "SELECT MAX(productivity) FROM coal_mining_productivity WHERE state = 'West Virginia' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Show the total revenue for each music genre in descending order.", "schema": "CREATE TABLE album_sales (id INT, album_name VARCHAR(50), artist_name VARCHAR(50), genre VARCHAR(20), revenue FLOAT);", "sql": "SELECT genre, SUM(revenue) AS total_revenue FROM album_sales GROUP BY genre ORDER BY total_revenue DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Show the number of unique users who listened to the 'Hip Hop' genre in France.", "schema": "CREATE TABLE listens (user_id INT, song_id INT, date DATE); CREATE TABLE songs (id INT, title VARCHAR(100), genre VARCHAR(50)); INSERT INTO listens (user_id, song_id, date) VALUES (1, 1, '2020-01-01'), (1, 2, '2020-01-02'), (2, 1, '2020-01-01'), (3, 3, '2020-01-03'), (3, 1, '2020-01-04'); INSERT INTO songs (id, title, genre) VALUES (1, 'Song1', 'Hip Hop'), (2, 'Song2', 'Pop'), (3, 'Song3', 'Hip Hop');", "sql": "SELECT COUNT(DISTINCT user_id) FROM listens JOIN songs ON listens.song_id = songs.id WHERE songs.genre = 'Hip Hop' AND listens.date BETWEEN '2020-01-01' AND '2020-12-31' AND listens.country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many teams scored 616 points?", "schema": "CREATE TABLE table_13018091_1 (won VARCHAR, pts_for VARCHAR)", "sql": "SELECT COUNT(won) FROM table_13018091_1 WHERE pts_for = 616;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Index (assertion 18).", "schema": null, "sql": "SELECT * FROM check_test(\n has_index( 'public', 'sometab', 'idx_foo', 'name'::name ),\n true,\n 'has_index() hash index no desc',\n 'Index idx_foo should exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the result did not qualify?", "schema": "CREATE TABLE table_name_85 (year VARCHAR, result VARCHAR)", "sql": "SELECT year FROM table_name_85 WHERE result = 'did not qualify';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of articles published by 'The New York Times' and 'The Washington Post'?", "schema": "CREATE TABLE ny_times (article_id INT, title TEXT, publisher TEXT); INSERT INTO ny_times (article_id, title, publisher) VALUES (1, 'Article 1', 'The New York Times'), (2, 'Article 2', 'The New York Times'); CREATE TABLE wa_post (article_id INT, title TEXT, publisher TEXT); INSERT INTO wa_post (article_id, title, publisher) VALUES (3, 'Article 3', 'The Washington Post'), (4, 'Article 4', 'The Washington Post');", "sql": "SELECT COUNT(*) FROM ( (SELECT * FROM ny_times) UNION (SELECT * FROM wa_post) );", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the date of birth with end of term being 5july1978", "schema": "CREATE TABLE table_12134383_1 (date_of_birth VARCHAR, end_of_term VARCHAR)", "sql": "SELECT date_of_birth FROM table_12134383_1 WHERE end_of_term = '5July1978';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Update the position of the employee with ID 2 in the \"employee_records\" table", "schema": "CREATE TABLE employee_records (employee_id INT PRIMARY KEY, name TEXT, position TEXT, leaving_date DATE); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (1, 'John Doe', 'CTO', '2018-01-01'); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (2, 'Jane Smith', 'COO', '2019-05-15'); INSERT INTO employee_records (employee_id, name, position, leaving_date) VALUES (3, 'Alice Johnson', 'Data Analyst', '2020-03-20');", "sql": "UPDATE employee_records SET position = 'VP of Operations' WHERE employee_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the minimum age of sea ice in the Beaufort Sea in 2021?", "schema": "CREATE TABLE sea_ice_age (sea VARCHAR(50), year INT, age INT);", "sql": "SELECT MIN(age) FROM sea_ice_age WHERE sea = 'Beaufort Sea' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 30).", "schema": null, "sql": "select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*');", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total quantity of each textile material sourced from each country?", "schema": "CREATE TABLE Textiles (id INT, material VARCHAR(20), country VARCHAR(20), quantity INT); CREATE VIEW TextilesByCountryMaterial AS SELECT material, country, SUM(quantity) as total_quantity FROM Textiles GROUP BY material, country;", "sql": "SELECT material, country, total_quantity FROM TextilesByCountryMaterial;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which school/club has a pick smaller than 196, a round higher than 5 and has Blake Miller?", "schema": "CREATE TABLE table_name_25 (school_club_team VARCHAR, player VARCHAR, pick VARCHAR, round VARCHAR)", "sql": "SELECT school_club_team FROM table_name_25 WHERE pick < 196 AND round > 5 AND player = 'blake miller';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the finalists in the 1972–73 season?", "schema": "CREATE TABLE table_name_53 (finalists VARCHAR, season VARCHAR)", "sql": "SELECT finalists FROM table_name_53 WHERE season = '1972–73';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Minimum how many solar panels are installed in buildings located in Australia?", "schema": "CREATE TABLE solar_panels (building_id INT, quantity INT); CREATE TABLE buildings (id INT, country TEXT);", "sql": "SELECT MIN(quantity) FROM solar_panels JOIN buildings ON solar_panels.building_id = buildings.id WHERE country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When North Melbourne is the Away team, what is the total number of the Crowd?", "schema": "CREATE TABLE table_name_46 (crowd VARCHAR, away_team VARCHAR)", "sql": "SELECT COUNT(crowd) FROM table_name_46 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which country has the most space debris?", "schema": "CREATE TABLE space_debris (debris_id INT, name VARCHAR(255), country VARCHAR(255), debris_type VARCHAR(255));", "sql": "SELECT country, COUNT(*) as total_debris FROM space_debris GROUP BY country ORDER BY total_debris DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the total funding received by organizations in the Arctic region in the year 2022?", "schema": "CREATE TABLE funding (id INT PRIMARY KEY, name VARCHAR(255), amount INT, organization VARCHAR(255), region VARCHAR(255), year INT); INSERT INTO funding (id, name, amount, organization, region, year) VALUES (1, 'Funding A', 100000, 'National Science Foundation', 'Arctic', 2023), (2, 'Funding B', 200000, 'University of the Arctic', 'Arctic', 2022), (3, 'Funding C', 50000, 'Arctic Council', 'Antarctic', 2023);", "sql": "SELECT SUM(f.amount) AS total_funding FROM funding f WHERE f.region = 'Arctic' AND f.year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total number of community health workers by gender?", "schema": "CREATE TABLE workers (worker_id INT, worker_name VARCHAR(50), gender VARCHAR(10), region_id INT);", "sql": "SELECT gender, COUNT(*) FROM workers GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the first operational with programming being not programmable—single purpose", "schema": "CREATE TABLE table_13636_1 (first_operational VARCHAR, programming VARCHAR)", "sql": "SELECT first_operational FROM table_13636_1 WHERE programming = 'Not programmable—single purpose';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the mascot for the school in 32 Hendricks County?", "schema": "CREATE TABLE table_name_11 (mascot VARCHAR, county VARCHAR)", "sql": "SELECT mascot FROM table_name_11 WHERE county = '32 hendricks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the maximum fare for a trip on the Red Line?", "schema": "CREATE TABLE if not exists metro_lines (line_id serial primary key,name varchar(255));CREATE TABLE if not exists metro_stations (station_id serial primary key,name varchar(255),line_id int);CREATE TABLE if not exists routes (route_id serial primary key,line_id int,start_station_id int,end_station_id int);CREATE TABLE if not exists fares (fare_id serial primary key,route_id int,price decimal);", "sql": "SELECT MAX(f.price) FROM fares f JOIN routes r ON f.route_id = r.route_id JOIN metro_stations s ON r.start_station_id = s.station_id WHERE s.line_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 9).", "schema": null, "sql": "INSERT INTO J1_TBL VALUES (7, 7, 'seven');", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the total funding received by biotech startups in India, grouped by year?", "schema": "CREATE TABLE startups (id INT, name VARCHAR(255), country VARCHAR(255), funding FLOAT, date DATE); INSERT INTO startups (id, name, country, funding, date) VALUES (1, 'StartupA', 'India', 5000000, '2020-01-01'); INSERT INTO startups (id, name, country, funding, date) VALUES (2, 'StartupB', 'India', 7000000, '2019-01-01');", "sql": "SELECT country, YEAR(date) AS year, SUM(funding) FROM startups WHERE country = 'India' GROUP BY year, country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Update the 'LastEruption' date of the 'Eyjafjallajökull' volcano in the 'UnderwaterVolcanoes' table", "schema": "CREATE TABLE UnderwaterVolcanoes (VolcanoID INT, VolcanoName VARCHAR(255), Location VARCHAR(255), LastEruption DATE);", "sql": "UPDATE UnderwaterVolcanoes SET LastEruption = '2010-04-14' WHERE VolcanoName = 'Eyjafjallajökull';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Connecticut home games were broadcast?", "schema": "CREATE TABLE table_28298589_4 (broadcast VARCHAR, home_team VARCHAR)", "sql": "SELECT COUNT(broadcast) FROM table_28298589_4 WHERE home_team = 'Connecticut';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all funding sources with amounts greater than $10,000", "schema": "CREATE TABLE FundingSources (FundingSourceID INT PRIMARY KEY, Name VARCHAR(100), Amount FLOAT, Date DATE); INSERT INTO FundingSources (FundingSourceID, Name, Amount, Date) VALUES (1, 'National Endowment for the Arts', 50000, '2021-12-15');", "sql": "SELECT * FROM FundingSources WHERE Amount > 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Show the names of all destinations that were marketed in 2020 and had more than 15000 tourists visiting in that year.", "schema": "CREATE TABLE marketing_campaigns (destination VARCHAR(20), year INT); CREATE TABLE tourism_stats (destination VARCHAR(20), year INT, tourists INT); INSERT INTO marketing_campaigns (destination, year) VALUES ('Japan', 2020), ('France', 2020), ('Germany', 2021), ('Italy', 2020); INSERT INTO tourism_stats (destination, year, tourists) VALUES ('Japan', 2020, 20000), ('Japan', 2021, 25000), ('France', 2020, 16000), ('France', 2021, 18000), ('Germany', 2021, 12000), ('Italy', 2020, 15000), ('Italy', 2021, 17000);", "sql": "SELECT destination FROM marketing_campaigns WHERE year = 2020 INTERSECT SELECT destination FROM tourism_stats WHERE tourists > 15000 AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What is the maximum number of items produced using fair labor practices in a single factory?", "schema": "CREATE TABLE FairLaborFactories (id INT, factory_name VARCHAR(50), items INT); INSERT INTO FairLaborFactories (id, factory_name, items) VALUES (1, 'GreenFactory', 2000), (2, 'EcoTextiles', 3000), (3, 'SustainableWeaves', 1000);", "sql": "SELECT MAX(items) FROM FairLaborFactories;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the maximum budget allocated for a single cybersecurity project?", "schema": "CREATE TABLE CybersecurityBudget (id INT, project VARCHAR(100), budget FLOAT); INSERT INTO CybersecurityBudget (id, project, budget) VALUES (1, 'Project1', 250000.00); INSERT INTO CybersecurityBudget (id, project, budget) VALUES (2, 'Project2', 1000000.00);", "sql": "SELECT MAX(budget) FROM CybersecurityBudget;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "How many users have played 'GameA' and 'GameB'?", "schema": "CREATE TABLE users_games (user_id INT, game_id INT, played_at TIMESTAMP); INSERT INTO users_games (user_id, game_id, played_at) VALUES (1, 1, '2021-01-01 10:00:00'), (2, 1, '2021-01-02 11:00:00'), (3, 2, '2021-01-03 12:00:00'), (4, 2, '2021-01-04 13:00:00'), (5, 3, '2021-01-05 14:00:00');", "sql": "SELECT game_id, COUNT(DISTINCT user_id) as unique_users FROM users_games WHERE game_id IN (1, 2) GROUP BY game_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Identify the regions with highest carbon sequestration in 2020.", "schema": "CREATE TABLE carbon_sequestration (year INT, region VARCHAR(255), sequestration FLOAT); INSERT INTO carbon_sequestration (year, region, sequestration) VALUES (2020, 'Region A', 1300.0), (2020, 'Region B', 1400.0), (2020, 'Region C', 1200.0);", "sql": "SELECT region FROM carbon_sequestration WHERE sequestration = (SELECT MAX(sequestration) FROM carbon_sequestration WHERE year = 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 12).", "schema": null, "sql": "SELECT * FROM test_float4 WHERE i<=1::float8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What's the name of the blockchain with the highest market capitalization?", "schema": "CREATE TABLE market_cap (id INT, blockchain_name VARCHAR(255), market_cap INT); INSERT INTO market_cap (id, blockchain_name, market_cap) VALUES (1, 'Bitcoin', 1000000000), (2, 'Ethereum', 500000000), (3, 'Ripple', 200000000);", "sql": "SELECT blockchain_name FROM market_cap WHERE market_cap = (SELECT MAX(market_cap) FROM market_cap);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What status of school is informatics international college?", "schema": "CREATE TABLE table_2589963_1 (status VARCHAR, institution VARCHAR)", "sql": "SELECT status FROM table_2589963_1 WHERE institution = 'Informatics International College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest rank for puerto rico?", "schema": "CREATE TABLE table_150340_3 (rank INTEGER, country VARCHAR)", "sql": "SELECT MIN(rank) FROM table_150340_3 WHERE country = 'Puerto Rico';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Among all the claims, what is the amount claimed in the claim with the least amount settled? List both the settlement amount and claim amount.", "schema": "CREATE TABLE Claims (Amount_Settled VARCHAR, Amount_Claimed VARCHAR)", "sql": "SELECT Amount_Settled, Amount_Claimed FROM Claims ORDER BY Amount_Settled LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many articles were published per month in the 'technology' category over the last 2 years?", "schema": "CREATE TABLE articles (id INT, title TEXT, category TEXT, publish_date DATE); INSERT INTO articles (id, title, category, publish_date) VALUES (1, 'Article Title 1', 'technology', '2020-02-01'), (2, 'Article Title 2', 'technology', '2022-06-05');", "sql": "SELECT DATE_FORMAT(publish_date, '%Y-%m') AS pub_month, COUNT(*) AS num_articles FROM articles WHERE category = 'technology' AND publish_date >= NOW() - INTERVAL 2 YEAR GROUP BY pub_month ORDER BY pub_month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of wins of the year with less than 5 top 10s, a winning of $39,190 and less than 2 starts?", "schema": "CREATE TABLE table_name_17 (wins INTEGER, starts VARCHAR, top_10 VARCHAR, winnings VARCHAR)", "sql": "SELECT AVG(wins) FROM table_name_17 WHERE top_10 < 5 AND winnings = '$39,190' AND starts < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the year with a Kurtis Kraft 500a chassis, and less than 1.5 points?", "schema": "CREATE TABLE table_name_10 (year INTEGER, chassis VARCHAR, points VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_10 WHERE chassis = 'kurtis kraft 500a' AND points < 1.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Score, when Date is \"13 March 1985\", and when Away Team is \"Millwall\"?", "schema": "CREATE TABLE table_name_67 (score VARCHAR, date VARCHAR, away_team VARCHAR)", "sql": "SELECT score FROM table_name_67 WHERE date = '13 march 1985' AND away_team = 'millwall';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What league has 2-3 as the away?", "schema": "CREATE TABLE table_name_18 (league VARCHAR, away VARCHAR)", "sql": "SELECT league FROM table_name_18 WHERE away = '2-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average price of 'Sustainable T-Shirts' sold in the 'Europe' region in Q1 2022?", "schema": "CREATE TABLE Sales (id INT, product VARCHAR(20), region VARCHAR(20), price DECIMAL(5,2), sale_date DATE); INSERT INTO Sales (id, product, region, price, sale_date) VALUES (1, 'Sustainable T-Shirt', 'Europe', 25.99, '2022-01-02'), (2, 'Regular T-Shirt', 'North America', 19.99, '2022-02-15'), (3, 'Sustainable T-Shirt', 'Europe', 27.49, '2022-03-28');", "sql": "SELECT AVG(price) FROM Sales WHERE product = 'Sustainable T-Shirt' AND region = 'Europe' AND sale_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the total investment in the retail sector for customers in all regions?", "schema": "CREATE TABLE investment_data (customer_id INT, sector VARCHAR(20), investment FLOAT); INSERT INTO investment_data (customer_id, sector, investment) VALUES (1, 'technology', 5000), (2, 'retail', 8000), (3, 'retail', 3000); CREATE TABLE customer_data (customer_id INT, name VARCHAR(20), region VARCHAR(10)); INSERT INTO customer_data (customer_id, name, region) VALUES (1, 'John Doe', 'east'), (2, 'Jane Smith', 'south'), (3, 'Mary Johnson', 'west');", "sql": "SELECT SUM(investment) FROM investment_data INNER JOIN customer_data ON investment_data.customer_id = customer_data.customer_id WHERE sector = 'retail';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 87).", "schema": null, "sql": "SELECT * FROM test_type_conversion_array_int4(ARRAY[NULL,1]);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 61, "num_statements": 1} {"question": "Add a new row to the carbon_offset_programs table for a program with id 6, name 'Public Transportation', start_date 2024-01-01 and end_date 2026-12-31", "schema": "CREATE TABLE carbon_offset_programs (id INT, name TEXT, start_date DATE, end_date DATE);", "sql": "INSERT INTO carbon_offset_programs (id, name, start_date, end_date) VALUES (6, 'Public Transportation', '2024-01-01', '2026-12-31');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of nurses who are nursing an undergoing treatment.", "schema": "CREATE TABLE undergoes (AssistingNurse VARCHAR); CREATE TABLE nurse (name VARCHAR, EmployeeID VARCHAR)", "sql": "SELECT DISTINCT T2.name FROM undergoes AS T1 JOIN nurse AS T2 ON T1.AssistingNurse = T2.EmployeeID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 4).", "schema": null, "sql": "SELECT digest('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq', 'sha224');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 13).", "schema": null, "sql": "SELECT count(*) FROM datetmp WHERE a = '2001-02-13'::date;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Language, when Content is Sport, when HDTV is No, and when Television Service is ESPN America?", "schema": "CREATE TABLE table_name_71 (language VARCHAR, television_service VARCHAR, content VARCHAR, hdtv VARCHAR)", "sql": "SELECT language FROM table_name_71 WHERE content = 'sport' AND hdtv = 'no' AND television_service = 'espn america';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Which city generated the most revenue from hip-hop concerts in the last 2 years?", "schema": "CREATE TABLE Concerts (location VARCHAR(50), concert_genre VARCHAR(20), year INT, revenue FLOAT); INSERT INTO Concerts (location, concert_genre, year, revenue) VALUES ('New York', 'Hip-Hop', 2020, 45000.00), ('Atlanta', 'Hip-Hop', 2020, 55000.00), ('Los Angeles', 'Hip-Hop', 2021, 65000.00);", "sql": "SELECT location, MAX(revenue) FROM Concerts WHERE concert_genre = 'Hip-Hop' AND year BETWEEN (SELECT MAX(year) - 2 FROM Concerts) AND MAX(year) GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "List all organic farming practices in descending order of their associated costs (in dollars) in the 'food_justice' schema?", "schema": "CREATE SCHEMA food_justice;CREATE TABLE organic_practices (id INT, practice VARCHAR(50), cost FLOAT);INSERT INTO food_justice.organic_practices (id, practice, cost) VALUES (1, 'Practice A', 150.5), (2, 'Practice B', 200.3), (3, 'Practice C', 250.0);", "sql": "SELECT practice, cost FROM food_justice.organic_practices ORDER BY cost DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Find the maximum number of points scored in a single game in NBA history?", "schema": "CREATE TABLE NBA_Matches (MatchID INT, HomeTeam VARCHAR(50), AwayTeam VARCHAR(50), HomeTeamScore INT, AwayTeamScore INT); INSERT INTO NBA_Matches (MatchID, HomeTeam, AwayTeam, HomeTeamScore, AwayTeamScore) VALUES (1, 'Chicago Bulls', 'Golden State Warriors', 142, 128);", "sql": "SELECT MAX(HomeTeamScore + AwayTeamScore) FROM NBA_Matches;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who won on 29 may?", "schema": "CREATE TABLE table_name_32 (winner VARCHAR, date VARCHAR)", "sql": "SELECT winner FROM table_name_32 WHERE date = '29 may';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were all the tries for when the try bonus was 8?", "schema": "CREATE TABLE table_13564637_5 (tries_for VARCHAR, try_bonus VARCHAR)", "sql": "SELECT tries_for FROM table_13564637_5 WHERE try_bonus = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the minimum age of attendees who visited the museum last year?", "schema": "CREATE TABLE MuseumAttendees (attendeeID INT, visitDate DATE, age INT); INSERT INTO MuseumAttendees (attendeeID, visitDate, age) VALUES (1, '2022-02-03', 35), (2, '2022-08-17', 42), (3, '2022-12-25', 28);", "sql": "SELECT MIN(age) FROM MuseumAttendees WHERE visitDate >= '2022-01-01' AND visitDate <= '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "How many species are present in each region?", "schema": "CREATE TABLE species (species_id INT, species_name TEXT);CREATE TABLE regions (region_id INT, region_name TEXT);CREATE TABLE species_regions (species_region_id INT, species_id INT, region_id INT); INSERT INTO species (species_id, species_name) VALUES (1, 'Oak'), (2, 'Pine'), (3, 'Maple'); INSERT INTO regions (region_id, region_name) VALUES (1, 'Region A'), (2, 'Region B'); INSERT INTO species_regions (species_region_id, species_id, region_id) VALUES (1, 1, 1), (2, 2, 1), (3, 3, 2);", "sql": "SELECT r.region_id, r.region_name, COUNT(sr.species_id) FROM regions AS r JOIN species_regions AS sr ON r.region_id = sr.region_id GROUP BY r.region_id, r.region_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Candlelight Records Catalog of Candle053tin format?", "schema": "CREATE TABLE table_name_93 (format VARCHAR, label VARCHAR, catalog VARCHAR)", "sql": "SELECT format FROM table_name_93 WHERE label = 'candlelight records' AND catalog = 'candle053tin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the name and type of all satellites launched by spacecraft that were launched after the year 2010?", "schema": "CREATE TABLE Spacecraft (id INT, name VARCHAR(50), country VARCHAR(50), launch_date DATE); INSERT INTO Spacecraft (id, name, country, launch_date) VALUES (1, 'Falcon 9', 'USA', '2010-06-04'); INSERT INTO Spacecraft (id, name, country, launch_date) VALUES (2, 'Soyuz-FG', 'Russia', '2001-11-02'); INSERT INTO Spacecraft (id, name, country, launch_date) VALUES (3, 'Long March 3B', 'China', '1996-02-19'); CREATE TABLE Satellites (id INT, name VARCHAR(50), type VARCHAR(50), spacecraft_id INT); INSERT INTO Satellites (id, name, type, spacecraft_id) VALUES (1, 'TESS', 'Observation', 1); INSERT INTO Satellites (id, name, type, spacecraft_id) VALUES (2, 'MetOp-C', 'Weather', 2); INSERT INTO Satellites (id, name, type, spacecraft_id) VALUES (3, 'Chinasat 18', 'Communication', 3);", "sql": "SELECT s.name, s.type FROM Satellites s JOIN Spacecraft sp ON s.spacecraft_id = sp.id WHERE sp.launch_date > '2010-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score on november 21?", "schema": "CREATE TABLE table_name_2 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_2 WHERE date = 'november 21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show the revenue for each product category", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), category VARCHAR(255)); INSERT INTO products (product_id, product_name, category) VALUES (1, 'Apples', 'Fruit'), (2, 'Bananas', 'Fruit'), (3, 'Milk', 'Dairy'); CREATE TABLE sales (sale_id INT, product_id INT, revenue INT); INSERT INTO sales (sale_id, product_id, revenue) VALUES (1, 1, 100), (2, 2, 50), (3, 3, 200);", "sql": "SELECT products.category, SUM(sales.revenue) FROM sales INNER JOIN products ON sales.product_id = products.product_id GROUP BY products.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the player for NHL team for mighty ducks of anaheim", "schema": "CREATE TABLE table_name_89 (player VARCHAR, nhl_team VARCHAR)", "sql": "SELECT player FROM table_name_89 WHERE nhl_team = 'mighty ducks of anaheim';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Update the genre of the genre with id 2 to 'New Genre' in the 'genres' table", "schema": "CREATE TABLE genres (id INT, genre TEXT);", "sql": "UPDATE genres SET genre = 'New Genre' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "List all organizations that have received grants in the Climate Change sector, in alphabetical order?", "schema": "CREATE TABLE Organizations (OrgID INT, OrgName TEXT, Sector TEXT); CREATE TABLE Grants (GrantID INT, GrantName TEXT, OrgID INT, Sector TEXT, Amount DECIMAL);", "sql": "SELECT OrgName FROM Organizations JOIN Grants ON Organizations.OrgID = Grants.OrgID WHERE Sector = 'Climate Change' ORDER BY OrgName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_am' (example 105).", "schema": null, "sql": "CREATE TABLE am_partitioned_1 PARTITION OF am_partitioned\n FOR VALUES WITH (MODULUS 10, REMAINDER 1);", "explanation": "DDL from PostgreSQL core regression test for Create Am.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "How many research grants were awarded to each department in the past two years, and what was the total amount awarded?", "schema": "CREATE TABLE research_grants (id INT, department TEXT, amount INT, year INT); INSERT INTO research_grants (id, department, amount, year) VALUES (8, 'Computer Science', 6000, 2021); INSERT INTO research_grants (id, department, amount, year) VALUES (9, 'Mathematics', 4000, 2020);", "sql": "SELECT department, COUNT(*) as num_grants, SUM(amount) as total_amount FROM research_grants WHERE year BETWEEN 2020 AND 2021 GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What is the percentage of crimes committed by juveniles compared to adults?", "schema": "CREATE TABLE crimes (age VARCHAR(255), count INT); INSERT INTO crimes (age, count) VALUES ('Juvenile', 25), ('Adult', 75);", "sql": "SELECT age, count, 100.0 * count / SUM(count) OVER () FROM crimes;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT '19970210 173201' AT TIME ZONE 'America/New_York';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '19970210 173201' AT TIME ZONE 'America/New_York') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all the log ids and their descriptions from the problem logs.", "schema": "CREATE TABLE problem_log (problem_log_id VARCHAR, log_entry_description VARCHAR)", "sql": "SELECT problem_log_id, log_entry_description FROM problem_log;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Show the total sales for each supplier country, from the sales_fact table, grouped by supplier_country.", "schema": "CREATE TABLE order_fact (order_id INT, sale_id INT, supplier_id INT, order_date DATE);", "sql": "SELECT s.supplier_country, SUM(sf.sale_quantity * sf.sale_price) as total_sales FROM sales_fact sf JOIN order_fact o ON sf.sale_id = o.sale_id JOIN supplier_dim s ON o.supplier_id = s.supplier_id GROUP BY s.supplier_country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 224, "num_statements": 1} {"question": "What is the change in temperature for each Arctic research station over the last year?", "schema": "CREATE TABLE temperature_data (station VARCHAR(255), year INT, temperature FLOAT);", "sql": "SELECT station, (temperature - LAG(temperature) OVER (PARTITION BY station ORDER BY year)) AS temperature_change FROM temperature_data WHERE year BETWEEN 2022 AND 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 169, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the policy types of the customer named \"Dayana Robel\"?", "schema": "CREATE TABLE customers (customer_id VARCHAR, customer_name VARCHAR); CREATE TABLE available_policies (policy_type_code VARCHAR, policy_id VARCHAR); CREATE TABLE customers_policies (customer_id VARCHAR, policy_id VARCHAR)", "sql": "SELECT DISTINCT t3.policy_type_code FROM customers AS t1 JOIN customers_policies AS t2 ON t1.customer_id = t2.customer_id JOIN available_policies AS t3 ON t2.policy_id = t3.policy_id WHERE t1.customer_name = 'Dayana Robel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "Who has the highest number of career home runs among players from Japan in Major League Baseball?", "schema": "CREATE TABLE mlb_players (player_name VARCHAR(100), country VARCHAR(50), home_runs INT); INSERT INTO mlb_players VALUES ('Hideki Matsui', 'Japan', 175), ('Ichiro Suzuki', 'Japan', 117), ('Masahiro Tanaka', 'Japan', 1), ('Shohei Ohtani', 'Japan', 71);", "sql": "SELECT player_name, home_runs FROM mlb_players WHERE country = 'Japan' ORDER BY home_runs DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the total funding received by startups founded by individuals with disabilities in the fintech sector?", "schema": "CREATE TABLE startups (id INT, name TEXT, industry TEXT, founding_date DATE, founders TEXT, funding FLOAT); INSERT INTO startups (id, name, industry, founding_date, founders, funding) VALUES (1, 'FintechForAll', 'Fintech', '2020-01-01', 'Individuals with disabilities', 3000000.0);", "sql": "SELECT SUM(funding) FROM startups WHERE founders = 'Individuals with disabilities' AND industry = 'Fintech';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which supercharger gear ratio has a Octane rating of 68?", "schema": "CREATE TABLE table_name_22 (supercharger_gear_ratio VARCHAR, octane_rating VARCHAR)", "sql": "SELECT supercharger_gear_ratio FROM table_name_22 WHERE octane_rating = '68';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the name of all rooms sorted by their prices.", "schema": "CREATE TABLE Rooms (roomName VARCHAR, basePrice VARCHAR)", "sql": "SELECT roomName FROM Rooms ORDER BY basePrice;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Operation of find-min has what binary?", "schema": "CREATE TABLE table_name_98 (binary VARCHAR, operation VARCHAR)", "sql": "SELECT binary FROM table_name_98 WHERE operation = 'find-min';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the air date for the episode with production code ad1c13?", "schema": "CREATE TABLE table_name_64 (original_air_date VARCHAR, production_code VARCHAR)", "sql": "SELECT original_air_date FROM table_name_64 WHERE production_code = 'ad1c13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much Frequency MHz has a Call sign of w264bg?", "schema": "CREATE TABLE table_name_95 (frequency_mhz VARCHAR, call_sign VARCHAR)", "sql": "SELECT COUNT(frequency_mhz) FROM table_name_95 WHERE call_sign = 'w264bg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 118).", "schema": null, "sql": "INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'sepgsql' (item 36).", "schema": null, "sql": "CREATE FUNCTION customer_credit(int) RETURNS text\n AS 'SELECT regexp_replace(ccredit, ''-[0-9]+$'', ''-????'') FROM customer WHERE cid = $1'\n LANGUAGE sql;", "explanation": "SQL definition from the 'sepgsql' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Show total sales revenue for recycled products in the last month.", "schema": "CREATE TABLE products (product_id INT, name VARCHAR(255), recycled_materials BOOLEAN); INSERT INTO products (product_id, name, recycled_materials) VALUES (1, 'Recycled Notebook', TRUE), (2, 'Plastic Phone Case', FALSE); CREATE TABLE sales (sale_id INT, product_id INT, sale_price DECIMAL(10, 2), sale_date DATE); INSERT INTO sales (sale_id, product_id, sale_price, sale_date) VALUES (1, 1, 12.99, '2022-12-05'), (2, 2, 5.99, '2022-11-10'), (3, 1, 12.99, '2022-12-20');", "sql": "SELECT SUM(sale_price) FROM products JOIN sales ON products.product_id = sales.product_id WHERE recycled_materials = TRUE AND sale_date BETWEEN '2022-12-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 432).", "schema": null, "sql": "SELECT *\n FROM citext_matview m\n FULL JOIN citext_table t ON (t.id = m.id AND t *= m)\n WHERE t.id IS NULL OR m.id IS NULL;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest car number sponsored by UPS before 2001?", "schema": "CREATE TABLE table_name_22 (car__number INTEGER, sponsor VARCHAR, season VARCHAR)", "sql": "SELECT MIN(car__number) FROM table_name_22 WHERE sponsor = 'ups' AND season < 2001;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the winning driver of the race on 2 June with a.z.k./roc-compétition a.z.k./roc-compétition as the winning team?", "schema": "CREATE TABLE table_name_15 (winning_driver VARCHAR, winning_team VARCHAR, date VARCHAR)", "sql": "SELECT winning_driver FROM table_name_15 WHERE winning_team = 'a.z.k./roc-compétition a.z.k./roc-compétition' AND date = '2 june';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 575).", "schema": null, "sql": "-- Test trigger renaming on partitioned tables\ncreate table grandparent (id int, primary key (id)) partition by range (id);", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 123, "num_statements": 1} {"question": "Get the average mass (in kg) of spacecraft by country of origin.", "schema": "CREATE TABLE spacecraft_mass_kg (spacecraft_name VARCHAR(50), country VARCHAR(50), mass FLOAT);", "sql": "SELECT country, AVG(mass) as avg_mass_kg FROM spacecraft_mass_kg GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the salary and name of the employee who has the most number of aircraft certificates?", "schema": "CREATE TABLE Certificate (eid VARCHAR); CREATE TABLE Employee (name VARCHAR, salary VARCHAR, eid VARCHAR)", "sql": "SELECT T1.name, T1.salary FROM Employee AS T1 JOIN Certificate AS T2 ON T1.eid = T2.eid GROUP BY T1.eid ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "What is the total number of artworks by female artists in the 'Impressionist' movement?", "schema": "CREATE TABLE Artists (ArtistID INT, Name VARCHAR(50), Gender VARCHAR(10), Nationality VARCHAR(50), ArtMovement VARCHAR(50)); INSERT INTO Artists (ArtistID, Name, Gender, Nationality, ArtMovement) VALUES (1, 'Claude Monet', 'Male', 'French', 'Impressionist'); INSERT INTO Artists (ArtistID, Name, Gender, Nationality, ArtMovement) VALUES (2, 'Berthe Morisot', 'Female', 'French', 'Impressionist');", "sql": "SELECT COUNT(*) FROM Artists WHERE Gender = 'Female' AND ArtMovement = 'Impressionist';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years has 2 points?", "schema": "CREATE TABLE table_name_85 (year INTEGER, points VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_85 WHERE points = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Simple (example 11).", "schema": null, "sql": "-- make sure flushing local caches changes nothing\n\\c -\n\nselect simplecaller();", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Simple.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Score, when Date is \"2000-05-23\"?", "schema": "CREATE TABLE table_name_93 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_93 WHERE date = '2000-05-23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total area of all community gardens in New York City?", "schema": "CREATE TABLE community_gardens (garden_id INT, name TEXT, location TEXT, area REAL, city TEXT, state TEXT, zip_code TEXT); INSERT INTO community_gardens (garden_id, name, location, area, city, state, zip_code) VALUES (1, 'Green Oasis', '123 Main St', 0.25, 'New York', 'NY', '10001');", "sql": "SELECT SUM(area) FROM community_gardens WHERE city = 'New York' AND state = 'NY';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copy2' (example 194).", "schema": null, "sql": "CREATE TABLE rls_t1 (a int, b int, c int);", "explanation": "DDL from PostgreSQL core regression test for Copy2.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_index' (example 227).", "schema": null, "sql": "INSERT INTO concur_heap VALUES ('a','b');", "explanation": "DML from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "How many games were sold in each region in the 'sales' schema?", "schema": "CREATE TABLE sales.games (id INT, title VARCHAR(50), region VARCHAR(20), units_sold INT);", "sql": "SELECT region, SUM(units_sold) AS total_units_sold FROM sales.games GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Show companies with a diversity score above the industry average", "schema": "CREATE TABLE companies (id INT, name VARCHAR(50), industry VARCHAR(50), diversity_score DECIMAL(3,2)); INSERT INTO companies VALUES (1, 'Acme Corp', 'Technology', 0.85); INSERT INTO companies VALUES (2, 'Beta Inc', 'Retail', 0.70);", "sql": "SELECT companies.name, companies.diversity_score FROM companies INNER JOIN (SELECT industry, AVG(diversity_score) AS industry_avg FROM companies GROUP BY industry) AS industry_averages ON companies.industry = industry_averages.industry WHERE companies.diversity_score > industry_averages.industry_avg;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 301, "num_statements": 1} {"question": "Delete records in the menu_items table where the item_name is 'Chicken Caesar Salad' AND price is greater than 12.00", "schema": "CREATE TABLE menu_items (item_name VARCHAR(255), price DECIMAL(5,2));", "sql": "DELETE FROM menu_items WHERE item_name = 'Chicken Caesar Salad' AND price > 12.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the highest Pick # for the College of Simon Fraser?", "schema": "CREATE TABLE table_name_14 (pick__number INTEGER, college VARCHAR)", "sql": "SELECT MAX(pick__number) FROM table_name_14 WHERE college = 'simon fraser';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Find the number of unique IP addresses involved in each type of attack, for the last month?", "schema": "CREATE TABLE attacks (attack_type VARCHAR(255), ip_address VARCHAR(255), attack_date DATE); INSERT INTO attacks (attack_type, ip_address, attack_date) VALUES ('DDOS', '192.168.1.1', '2022-06-01'), ('DDOS', '192.168.1.2', '2022-06-01'), ('Phishing', '192.168.1.3', '2022-06-02'), ('Phishing', '192.168.1.4', '2022-06-02'), ('Phishing', '192.168.1.5', '2022-06-02'), ('Malware', '192.168.1.6', '2022-06-03');", "sql": "SELECT attack_type, COUNT(DISTINCT ip_address) as unique_ip_addresses FROM attacks WHERE attack_date >= DATEADD(month, -1, GETDATE()) GROUP BY attack_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the phone and email for customer with first name Aniyah and last name Feest?", "schema": "CREATE TABLE Customers (customer_phone VARCHAR, customer_email VARCHAR, customer_first_name VARCHAR, customer_last_name VARCHAR)", "sql": "SELECT customer_phone, customer_email FROM Customers WHERE customer_first_name = 'Aniyah' AND customer_last_name = 'Feest';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'xml' (example 137).", "schema": null, "sql": "CREATE VIEW xmlview4 AS SELECT xmlelement(name employee, xmlforest(name, age, salary as pay)) FROM emp;", "explanation": "DDL from PostgreSQL core regression test for Xml.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the total number of animals adopted by each community education program?", "schema": "CREATE TABLE community_education_programs (id INT, name VARCHAR(255), city VARCHAR(255)); INSERT INTO community_education_programs (id, name, city) VALUES (1, 'Wildlife Wonders', 'San Francisco'), (2, 'Nature Nurturers', 'New York'), (3, 'Eco Explorers', 'Los Angeles'); CREATE TABLE animal_adoptions (id INT, program_id INT, animal_id INT);", "sql": "SELECT cep.name, COUNT(aa.id) AS total_adoptions FROM community_education_programs cep JOIN animal_adoptions aa ON cep.id = aa.program_id GROUP BY cep.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of all crowds present at the Glenferrie Oval venue?", "schema": "CREATE TABLE table_name_35 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT SUM(crowd) FROM table_name_35 WHERE venue = 'glenferrie oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'aggregates' (example 542).", "schema": null, "sql": "create table agg_group_2 as\nselect * from\n (values (100), (300), (500)) as r(a),\n lateral (\n select (g/2)::numeric as c1,\n array_agg(g::numeric) as c2,\n\t count(*) as c3\n from agg_data_2k\n where g < r.a\n group by g/2) as s;", "explanation": "DDL from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 248, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 469).", "schema": null, "sql": "CREATE FUNCTION make_valid(ismn13)\n\tRETURNS ismn13\n\tAS 'MODULE_PATHNAME'\n\tLANGUAGE C\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "How many natural disasters were reported in the state of Florida in 2019?", "schema": "CREATE TABLE natural_disasters (id INT, state VARCHAR(255), year INT, number_of_disasters INT); INSERT INTO natural_disasters (id, state, year, number_of_disasters) VALUES (1, 'Florida', 2019, 50), (2, 'California', 2019, 75);", "sql": "SELECT SUM(number_of_disasters) FROM natural_disasters WHERE state = 'Florida' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 30).", "schema": null, "sql": "SELECT cube_ll_coord(ll_to_earth(90,180),1)::numeric(20,5),\n cube_ll_coord(ll_to_earth(90,180),2)::numeric(20,5),\n cube_ll_coord(ll_to_earth(90,180),3)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "How many security incidents were reported in the education sector in the last month?", "schema": "CREATE TABLE incidents (incident_id INT, incident_date DATE, incident_sector VARCHAR(255));", "sql": "SELECT COUNT(*) FROM incidents WHERE incident_sector = 'Education' AND incident_date >= DATEADD(month, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score against fabrice martin?", "schema": "CREATE TABLE table_name_66 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_name_66 WHERE opponent = 'fabrice martin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: All players are position mark polak.", "schema": "CREATE TABLE table_1013129_10 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_1013129_10 WHERE player = 'Mark Polak';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Insert new record of military equipment sale to 'South America' in 2021", "schema": "CREATE TABLE military_sales_3 (id INT, region VARCHAR, year INT, value FLOAT);", "sql": "INSERT INTO military_sales_3 (id, region, year, value) VALUES (1, 'South America', 2021, 500000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the average production quantity for wells in the 'onshore' category?", "schema": "CREATE TABLE wells (id INT, name VARCHAR(255), category VARCHAR(255), production_quantity INT); INSERT INTO wells (id, name, category, production_quantity) VALUES (1, 'Well A', 'onshore', 1000), (2, 'Well B', 'offshore', 2000), (3, 'Well C', 'onshore', 1500);", "sql": "SELECT AVG(production_quantity) FROM wells WHERE category = 'onshore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List the names and prices of menu items that are not offered at any restaurant located in 'New York'.", "schema": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(255), cuisine VARCHAR(255), location VARCHAR(255)); INSERT INTO restaurants (restaurant_id, name, cuisine, location) VALUES (1, 'Big Burger', 'American', 'New York'); INSERT INTO restaurants (restaurant_id, name, cuisine, location) VALUES (2, 'Sushi Hana', 'Japanese', 'California'); INSERT INTO restaurants (restaurant_id, name, cuisine, location) VALUES (3, 'Taco Time', 'Mexican', 'Texas'); CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(255), price DECIMAL(5,2), restaurant_id INT); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (1, 'Big Burger', 12.99, 1); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (2, 'Chicken Teriyaki', 15.99, 2); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (3, 'Garden Salad', 7.99, 1); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (4, 'Sushi Roll', 18.99, 2); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (5, 'Taco', 6.99, 3); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (6, 'Nachos', 8.99, 3); INSERT INTO menu_items (menu_item_id, name, price, restaurant_id) VALUES (7, 'Pizza', 10.99, NULL);", "sql": "SELECT name, price FROM menu_items WHERE menu_item_id NOT IN (SELECT menu_items.restaurant_id FROM menu_items JOIN restaurants ON menu_items.restaurant_id = restaurants.restaurant_id WHERE restaurants.location = 'New York') AND restaurant_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent in which the bout ended in round 2?", "schema": "CREATE TABLE table_name_71 (opponent VARCHAR, round VARCHAR)", "sql": "SELECT opponent FROM table_name_71 WHERE round = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Who is the fastest athlete in each sport?", "schema": "CREATE TABLE athletes (sport VARCHAR(50), name VARCHAR(50), time FLOAT); INSERT INTO athletes (sport, name, time) VALUES ('Swimming', 'Smith', 23.45), ('Swimming', 'Jones', 22.67), ('Athletics', 'Brown', 9.87), ('Athletics', 'Williams', 9.81);", "sql": "SELECT sport, name, MIN(time) AS fastest_time FROM athletes GROUP BY sport;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time of the 100m freestyle event?", "schema": "CREATE TABLE table_name_54 (time VARCHAR, event VARCHAR)", "sql": "SELECT time FROM table_name_54 WHERE event = '100m freestyle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'privileges' (example 206).", "schema": null, "sql": "END$$;\nCREATE MATERIALIZED VIEW sro_index_mv AS SELECT 1 AS c;\nCREATE UNIQUE INDEX ON sro_index_mv (c) WHERE unwanted_grant_nofail(1) > 0;\n\\c -\nREFRESH MATERIALIZED VIEW CONCURRENTLY sro_index_mv;\nREFRESH MATERIALIZED VIEW sro_index_mv;\n\nDROP OWNED BY regress_sro_user;\nDROP ROLE regress_sro_user;\n\n\n-- Admin options\n\nSET SESSION AUTHORIZATION regress_priv_user4;\nCREATE FUNCTION dogrant_ok() RETURNS void LANGUAGE sql SECURITY DEFINER AS\n\t'GRANT regress_priv_group2 TO regress_priv_user5';\nGRANT regress_priv_group2 TO regress_priv_user5; -- ok: had ADMIN OPTION\nSET ROLE regress_priv_group2;\nGRANT regress_priv_group2 TO regress_priv_user5; -- fails: SET ROLE suspended privilege\n\nSET SESSION AUTHORIZATION regress_priv_user1;\nGRANT regress_priv_group2 TO regress_priv_user5; -- fails: no ADMIN OPTION\nSELECT dogrant_ok();\t\t\t-- ok: SECURITY DEFINER conveys ADMIN\nSET ROLE regress_priv_group2;\nGRANT regress_priv_group2 TO regress_priv_user5; -- fails: SET ROLE did not help\n\nSET SESSION AUTHORIZATION regress_priv_group2;\nGRANT regress_priv_group2 TO regress_priv_user5; -- fails: no self-admin\n\nSET SESSION AUTHORIZATION regress_priv_user4;\nDROP FUNCTION dogrant_ok();\nREVOKE regress_priv_group2 FROM regress_priv_user5;\n\n\n-- has_sequence_privilege tests\n\\c -\n\nCREATE SEQUENCE x_seq;\n\nGRANT USAGE on x_seq to regress_priv_user2;\n\nSELECT has_sequence_privilege('regress_priv_user1', 'atest1', 'SELECT');\nSELECT has_sequence_privilege('regress_priv_user1', 'x_seq', 'INSERT');\nSELECT has_sequence_privilege('regress_priv_user1', 'x_seq', 'SELECT');\n\nSET SESSION AUTHORIZATION regress_priv_user2;\n\nSELECT has_sequence_privilege('x_seq', 'USAGE');\n\n-- largeobject privilege tests\n\\c -\nSET SESSION AUTHORIZATION regress_priv_user1;\n\nSELECT lo_create(1001);\nSELECT lo_create(1002);\nSELECT lo_create(1003);\nSELECT lo_create(1004);\nSELECT lo_create(1005);\n\nGRANT ALL ON LARGE OBJECT 1001 TO PUBLIC;\nGRANT SELECT ON LARGE OBJECT 1003 TO regress_priv_user2;\nGRANT SELECT,UPDATE ON LARGE OBJECT 1004 TO regress_priv_user2;\nGRANT ALL ON LARGE OBJECT 1005 TO regress_priv_user2;\nGRANT SELECT ON LARGE OBJECT 1005 TO regress_priv_user2 WITH GRANT OPTION;\n\nGRANT SELECT, INSERT ON LARGE OBJECT 1001 TO PUBLIC;\t-- to be failed\nGRANT SELECT, UPDATE ON LARGE OBJECT 1001 TO nosuchuser;\t-- to be failed\nGRANT SELECT, UPDATE ON LARGE OBJECT 999 TO PUBLIC;\t-- to be failed\n\n\\c -\nSET SESSION AUTHORIZATION regress_priv_user2;\n\nSELECT lo_create(2001);\nSELECT lo_create(2002);\n\nSELECT loread(lo_open(1001, x'20000'::int), 32);\t-- allowed, for now\nSELECT lowrite(lo_open(1001, x'40000'::int), 'abcd');\t-- fail, wrong mode\n\nSELECT loread(lo_open(1001, x'40000'::int), 32);\nSELECT loread(lo_open(1002, x'40000'::int), 32);\t-- to be denied\nSELECT loread(lo_open(1003, x'40000'::int), 32);\nSELECT loread(lo_open(1004, x'40000'::int), 32);\n\nSELECT lowrite(lo_open(1001, x'20000'::int), 'abcd');\nSELECT lowrite(lo_open(1002, x'20000'::int), 'abcd');\t-- to be denied\nSELECT lowrite(lo_open(1003, x'20000'::int), 'abcd');\t-- to be denied\nSELECT lowrite(lo_open(1004, x'20000'::int), 'abcd');\n\nGRANT SELECT ON LARGE OBJECT 1005 TO regress_priv_user3;\nGRANT UPDATE ON LARGE OBJECT 1006 TO regress_priv_user3;\t-- to be denied\nREVOKE ALL ON LARGE OBJECT 2001, 2002 FROM PUBLIC;\nGRANT ALL ON LARGE OBJECT 2001 TO regress_priv_user3;\n\nSELECT lo_unlink(1001);\t\t-- to be denied\nSELECT lo_unlink(2002);\n\n\\c -\n-- confirm ACL setting\nSELECT oid, pg_get_userbyid(lomowner) ownername, lomacl FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3000 ORDER BY oid;\n\nSET SESSION AUTHORIZATION regress_priv_user3;\n\nSELECT loread(lo_open(1001, x'40000'::int), 32);\nSELECT loread(lo_open(1003, x'40000'::int), 32);\t-- to be denied\nSELECT loread(lo_open(1005, x'40000'::int), 32);\n\nSELECT lo_truncate(lo_open(1005, x'20000'::int), 10);\t-- to be denied\nSELECT lo_truncate(lo_open(2001, x'20000'::int), 10);\n\n\\c -\n-- confirm role with privileges of pg_read_all_data can read large objects\nSET SESSION AUTHORIZATION regress_priv_user6;\n\nSELECT loread(lo_open(1002, x'40000'::int), 32);\nSELECT lo_get(1002);\nSELECT lowrite(lo_open(1002, x'20000'::int), 'abcd');\t-- to be denied\nSELECT lo_put(1002, 1, 'abcd');\t\t\t\t\t\t\t-- to be denied\nSELECT lo_truncate(lo_open(1002, x'20000'::int), 0);\t-- to be denied\nSELECT lo_unlink(1002);\t\t\t\t\t\t\t\t\t-- to be denied\n\n\\c -\n-- confirm role with privileges of pg_write_all_data can write large objects\nGRANT SELECT ON LARGE OBJECT 1002 TO regress_priv_user7;\nSET SESSION AUTHORIZATION regress_priv_user7;\n\nSELECT lowrite(lo_open(1002, x'20000'::int), 'abcd');\nSELECT lo_put(1002, 1, 'abcd');\nSELECT lo_truncate(lo_open(1002, x'20000'::int), 0);\nSELECT lo_unlink(1002);\t\t\t\t\t\t\t\t\t-- to be denied\n\n-- has_largeobject_privilege function\n\n-- superuser\n\\c -\nSELECT has_largeobject_privilege(1001, 'SELECT');\nSELECT has_largeobject_privilege(1002, 'SELECT');\nSELECT has_largeobject_privilege(1003, 'SELECT');\nSELECT has_largeobject_privilege(1004, 'SELECT');\n\nSELECT has_largeobject_privilege(1001, 'UPDATE');\nSELECT has_largeobject_privilege(1002, 'UPDATE');\nSELECT has_largeobject_privilege(1003, 'UPDATE');\nSELECT has_largeobject_privilege(1004, 'UPDATE');\n\n-- not-existing large object\nSELECT has_largeobject_privilege(9999, 'SELECT');\t-- NULL\n\n-- non-superuser\nSET SESSION AUTHORIZATION regress_priv_user2;\nSELECT has_largeobject_privilege(1001, 'SELECT');\nSELECT has_largeobject_privilege(1002, 'SELECT');\t-- false\nSELECT has_largeobject_privilege(1003, 'SELECT');\nSELECT has_largeobject_privilege(1004, 'SELECT');\n\nSELECT has_largeobject_privilege(1001, 'UPDATE');\nSELECT has_largeobject_privilege(1002, 'UPDATE');\t-- false\nSELECT has_largeobject_privilege(1003, 'UPDATE');\t-- false\nSELECT has_largeobject_privilege(1004, 'UPDATE');\n\nSELECT has_largeobject_privilege('regress_priv_user3', 1001, 'SELECT');\nSELECT has_largeobject_privilege('regress_priv_user3', 1003, 'SELECT');\t-- false\nSELECT has_largeobject_privilege('regress_priv_user3', 1005, 'SELECT');\n\nSELECT has_largeobject_privilege('regress_priv_user3', 1005, 'UPDATE');\t-- false\nSELECT has_largeobject_privilege('regress_priv_user3', 2001, 'UPDATE');\n\n-- compatibility mode in largeobject permission\n\\c -\nSET lo_compat_privileges = false;\t-- default setting\nSET SESSION AUTHORIZATION regress_priv_user4;\n\nSELECT has_largeobject_privilege(1002, 'SELECT'); -- false\nSELECT has_largeobject_privilege(1002, 'UPDATE'); -- false\n\nSELECT loread(lo_open(1002, x'40000'::int), 32);\t-- to be denied\nSELECT lowrite(lo_open(1002, x'20000'::int), 'abcd');\t-- to be denied\nSELECT lo_truncate(lo_open(1002, x'20000'::int), 10);\t-- to be denied\nSELECT lo_put(1002, 1, 'abcd');\t\t\t\t-- to be denied\nSELECT lo_unlink(1002);\t\t\t\t\t-- to be denied\nSELECT lo_export(1001, '/dev/null');\t\t\t-- to be denied\nSELECT lo_import('/dev/null');\t\t\t\t-- to be denied\nSELECT lo_import('/dev/null', 2003);\t\t\t-- to be denied\n\n\\c -\nSET lo_compat_privileges = true;\t-- compatibility mode\nSET SESSION AUTHORIZATION regress_priv_user4;\n\nSELECT has_largeobject_privilege(1002, 'SELECT'); -- true\nSELECT has_largeobject_privilege(1002, 'UPDATE'); -- true\n\nSELECT loread(lo_open(1002, x'40000'::int), 32);\nSELECT lowrite(lo_open(1002, x'20000'::int), 'abcd');\nSELECT lo_truncate(lo_open(1002, x'20000'::int), 10);\nSELECT lo_unlink(1002);\nSELECT lo_export(1001, '/dev/null');\t\t\t-- to be denied\n\n-- don't allow unpriv users to access pg_largeobject contents\n\\c -\nSELECT * FROM pg_largeobject LIMIT 0;\n\nSET SESSION AUTHORIZATION regress_priv_user1;\nSELECT * FROM pg_largeobject LIMIT 0;\t\t\t-- to be denied\n\n-- pg_signal_backend can't signal superusers\nRESET SESSION AUTHORIZATION;\nBEGIN;\nCREATE OR REPLACE FUNCTION terminate_nothrow(pid int) RETURNS bool\n\tLANGUAGE plpgsql SECURITY DEFINER SET client_min_messages = error AS $$\nBEGIN\n\tRETURN pg_terminate_backend($1);", "explanation": "PL/pgSQL object from PostgreSQL core test for Privileges.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 7758, "num_statements": 132} {"question": "Generate PostgreSQL SQL for: What is the highest number of losses with 23 points and 22 plays?", "schema": "CREATE TABLE table_name_58 (lost INTEGER, points VARCHAR, played VARCHAR)", "sql": "SELECT MAX(lost) FROM table_name_58 WHERE points = 23 AND played > 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of mental health parity violation incidents by state?", "schema": "CREATE TABLE mental_health_parity (state VARCHAR(2), incidents INT); INSERT INTO mental_health_parity (state, incidents) VALUES ('CA', 120), ('NY', 150), ('TX', 80);", "sql": "SELECT state, SUM(incidents) FROM mental_health_parity GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Display the names and fairness scores of models that have a higher fairness score than the average fairness score.", "schema": "CREATE TABLE model_fairness (model_id INT, fairness_score DECIMAL(3,2)); INSERT INTO model_fairness (model_id, fairness_score) VALUES (1, 0.85), (2, 0.70), (3, 0.92), (4, 0.68), (5, 0.89);", "sql": "SELECT model_id, fairness_score FROM model_fairness WHERE fairness_score > (SELECT AVG(fairness_score) FROM model_fairness);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the total number of rank for Grid less than 20 and dick rathmann and Qual more than 130.92", "schema": "CREATE TABLE table_name_73 (rank VARCHAR, qual VARCHAR, grid VARCHAR, driver VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_73 WHERE grid < 20 AND driver = 'dick rathmann' AND qual > 130.92;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 3.", "schema": null, "sql": "quantity integer DEFAULT 32; url varchar := 'http://mysite.com'; transaction_time CONSTANT timestamp with time zone := now();", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is the abbreviation of the district of Mahbubnagar?", "schema": "CREATE TABLE table_1610301_1 (code VARCHAR, district VARCHAR)", "sql": "SELECT code FROM table_1610301_1 WHERE district = 'Mahbubnagar';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the away team that played Essendon?", "schema": "CREATE TABLE table_name_98 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_98 WHERE home_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What are the names and annual revenues of all cultural heritage sites located in Tokyo, Japan?", "schema": "CREATE TABLE Cultural_Heritage_Sites (id INT, name VARCHAR(255), location VARCHAR(255), year_established INT, PRIMARY KEY(id)); INSERT INTO Cultural_Heritage_Sites (id, name, location, year_established) VALUES (1, 'Todai-ji Temple', 'Nara, Japan', 745);", "sql": "SELECT c.name, c.annual_revenue FROM Cultural_Heritage_Sites c WHERE c.location = 'Tokyo, Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 82).", "schema": null, "sql": "CREATE OPERATOR CLASS gin__int_ops\nFOR TYPE _int4 USING gin\nAS\n\tOPERATOR\t3\t&&,\n\tOPERATOR\t6\t= (anyarray, anyarray),\n\tOPERATOR\t7\t@>,\n\tOPERATOR\t8\t<@,\n\tOPERATOR\t13\t@,\n\tOPERATOR\t14\t~,\n\tOPERATOR\t20\t@@ (_int4, query_int),\n\tFUNCTION\t1\tbtint4cmp (int4, int4),\n\tFUNCTION\t2\tginarrayextract (anyarray, internal, internal),\n\tFUNCTION\t3\tginint4_queryextract (_int4, internal, int2, internal, internal, internal, internal),\n\tFUNCTION\t4\tginint4_consistent (internal, int2, _int4, int4, internal, internal, internal, internal),\n\tSTORAGE\t\tint4;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 526, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team got a draft pick player from McGill?", "schema": "CREATE TABLE table_25085059_3 (cfl_team VARCHAR, college VARCHAR)", "sql": "SELECT cfl_team FROM table_25085059_3 WHERE college = 'McGill';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the bleeding time during the prolonged partial thromboplastin time in which the prothrombin time is unaffected?", "schema": "CREATE TABLE table_238124_1 (bleeding_time VARCHAR, partial_thromboplastin_time VARCHAR, prothrombin_time VARCHAR)", "sql": "SELECT bleeding_time FROM table_238124_1 WHERE partial_thromboplastin_time = 'Prolonged' AND prothrombin_time = 'Unaffected';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What method has randy couture as the opponent?", "schema": "CREATE TABLE table_name_15 (method VARCHAR, opponent VARCHAR)", "sql": "SELECT method FROM table_name_15 WHERE opponent = 'randy couture';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Delete records in landfill_capacity table where capacity is greater than 30000 tons", "schema": "CREATE TABLE landfill_capacity (location VARCHAR(50), capacity INT);", "sql": "DELETE FROM landfill_capacity WHERE capacity > 30000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the maximum and total number of followers of all users.", "schema": "CREATE TABLE user_profiles (followers INTEGER)", "sql": "SELECT MAX(followers), SUM(followers) FROM user_profiles;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Find the total revenue for each music genre in the current month.", "schema": "CREATE TABLE music_revenue (song VARCHAR(255), genre VARCHAR(255), revenue INT, revenue_date DATE); INSERT INTO music_revenue (song, genre, revenue, revenue_date) VALUES ('Song1', 'Genre1', 5000000, '2022-02-01'), ('Song2', 'Genre2', 7000000, '2022-02-02'), ('Song3', 'Genre1', 6000000, '2022-02-03'), ('Song4', 'Genre2', 8000000, '2022-02-04'); ALTER TABLE music_revenue ADD CONSTRAINT chk_revenue_date CHECK (revenue_date >= DATEADD(month, DATEDIFF(month, 0, GETDATE()), 0));", "sql": "SELECT genre, SUM(revenue) as total_revenue FROM music_revenue GROUP BY genre ORDER BY total_revenue DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many goals were scored when they had over 28 points and played over 18 games?", "schema": "CREATE TABLE table_name_33 (goals_scored INTEGER, points VARCHAR, played VARCHAR)", "sql": "SELECT MAX(goals_scored) FROM table_name_33 WHERE points > 28 AND played > 18;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many goals were scored by Eugene Galeković?", "schema": "CREATE TABLE table_name_54 (goals VARCHAR, player VARCHAR)", "sql": "SELECT goals FROM table_name_54 WHERE player = 'eugene galeković';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 283).", "schema": null, "sql": "select aggfstr(distinct a,b,c order by b)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),\n generate_series(1,3) i;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select aggfstr(distinct a,b,c order by b)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),\n generate_series(1,3) i) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the total quantity of organic cotton and recycled polyester used in our sustainable clothing line?", "schema": "CREATE TABLE Fabrics (FabricID INT, FabricName TEXT, IsSustainable BOOLEAN, Quantity INT); INSERT INTO Fabrics (FabricID, FabricName, IsSustainable, Quantity) VALUES (1, 'Organic Cotton', TRUE, 1000), (2, 'Recycled Polyester', TRUE, 800), (3, 'Hemp', TRUE, 700), (4, 'Polyester', FALSE, 500);", "sql": "SELECT SUM(Quantity) FROM Fabrics WHERE (FabricName = 'Organic Cotton' OR FabricName = 'Recycled Polyester') AND IsSustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "List all community policing initiatives in the state of Texas.", "schema": "CREATE TABLE community_policing (id INT, state VARCHAR(20), initiative VARCHAR(50)); INSERT INTO community_policing (id, state, initiative) VALUES (1, 'Texas', 'Neighborhood Watch'), (2, 'California', 'Coffee with a Cop'), (3, 'Texas', 'Citizens Police Academy');", "sql": "SELECT initiative FROM community_policing WHERE state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every year when the reporters were Lesley Visser and Robin Roberts?", "schema": "CREATE TABLE table_22654139_3 (year VARCHAR, reporters VARCHAR)", "sql": "SELECT year FROM table_22654139_3 WHERE reporters = 'Lesley Visser and Robin Roberts';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a SQL definition from the citus project (pg13, item 25).", "schema": null, "sql": "-- row suffix notation works fine\nCREATE TABLE ab (a int, b int);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 121).", "schema": null, "sql": "SELECT tanh(float8 'infinity');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tanh(float8 'infinity')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Find the number of defense contracts awarded per quarter in the 'defense_contracts' table", "schema": "CREATE TABLE defense_contracts (contract_id INT, company_name VARCHAR(100), contract_value DECIMAL(10, 2), contract_date DATE);", "sql": "SELECT EXTRACT(QUARTER FROM contract_date) as quarter, COUNT(*) as num_contracts FROM defense_contracts GROUP BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "List all green building materials and the number of each material used in a specific city, for projects in London.", "schema": "CREATE TABLE GreenBuildingMaterials (MaterialID INT, MaterialName VARCHAR(50));CREATE TABLE GreenBuildingMaterialsUsage (UsageID INT, MaterialID INT, CityID INT, ProjectID INT);", "sql": "SELECT GreenBuildingMaterials.MaterialName, COUNT(GreenBuildingMaterialsUsage.UsageID) FROM GreenBuildingMaterials INNER JOIN GreenBuildingMaterialsUsage ON GreenBuildingMaterials.MaterialID = GreenBuildingMaterialsUsage.MaterialID WHERE GreenBuildingMaterialsUsage.CityID = 2 GROUP BY GreenBuildingMaterials.MaterialName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 322, "num_statements": 1} {"question": "Show the total claim amounts for policyholders in California who made claims in the last 3 months.", "schema": "CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount DECIMAL(5,2), ClaimDate DATE); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount, ClaimDate) VALUES (1, 1, 500, '2020-01-01'); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount, ClaimDate) VALUES (2, 1, 750, '2020-02-01'); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount, ClaimDate) VALUES (3, 2, 400, '2020-03-01'); CREATE TABLE Policyholders (PolicyID INT, PolicyholderName VARCHAR(50), State VARCHAR(2)); INSERT INTO Policyholders (PolicyID, PolicyholderName, State) VALUES (1, 'Maria Rodriguez', 'California'); INSERT INTO Policyholders (PolicyID, PolicyholderName, State) VALUES (2, 'David Kim', 'California');", "sql": "SELECT PolicyholderName, SUM(ClaimAmount) FROM Claims JOIN Policyholders ON Claims.PolicyID = Policyholders.PolicyID WHERE Policyholders.State = 'California' AND ClaimDate >= DATEADD(month, -3, CURRENT_DATE) GROUP BY PolicyholderName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_visibility' (example 43).", "schema": null, "sql": "select pg_check_frozen('test_foreign_table');", "explanation": "Example query from the 'pg_visibility' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 60).", "schema": null, "sql": "SELECT * FROM pg_partition_tree('ptif_test_matview');", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_partition_tree('ptif_test_matview')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'path' (example 10).", "schema": null, "sql": "INSERT INTO PATH_TBL VALUES ('( 11,12,13,14) ');", "explanation": "DML from PostgreSQL core regression test for Path.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the total number of hours volunteered in all programs?", "schema": "CREATE TABLE volunteers (id INT, name TEXT, program TEXT, hours INT); INSERT INTO volunteers (id, name, program, hours) VALUES (1, 'John Doe', 'Food Distribution', 10), (2, 'Jane Smith', 'Education Support', 20);", "sql": "SELECT SUM(hours) FROM volunteers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "What is the explainability score for each AI safety concern?", "schema": "CREATE TABLE AISafety (id INT, concern VARCHAR(255), explainability_score DECIMAL(5,2)); INSERT INTO AISafety (id, concern, explainability_score) VALUES (1, 'Data Privacy', 78.91), (2, 'Unintended Consequences', 65.23), (3, 'Bias', 82.34);", "sql": "SELECT concern, AVG(explainability_score) as avg_explainability_score FROM AISafety GROUP BY concern;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2000 value if the 1998 value is 1.5?", "schema": "CREATE TABLE table_name_5 (Id VARCHAR)", "sql": "SELECT 2000 FROM table_name_5 WHERE 1998 = '1.5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the earliest date a 'subway' station was cleaned?", "schema": "CREATE TABLE public.cleaning (cleaning_id SERIAL PRIMARY KEY, cleaning_type VARCHAR(20), cleaning_date DATE, station_id INTEGER, FOREIGN KEY (station_id) REFERENCES public.station(station_id)); INSERT INTO public.cleaning (cleaning_type, cleaning_date, station_id) VALUES ('routine cleaning', '2022-03-03', 1), ('deep cleaning', '2022-03-15', 2);", "sql": "SELECT MIN(cleaning_date) FROM public.cleaning INNER JOIN public.station ON public.cleaning.station_id = public.station.station_id WHERE route_type = 'subway';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 159, "num_statements": 1} {"question": "Which country has the most hotels in the 'Global_Hotels' table?", "schema": "CREATE TABLE Global_Hotels (country VARCHAR(50), hotel_count INT); INSERT INTO Global_Hotels (country, hotel_count) VALUES ('USA', 5000), ('Canada', 1000), ('Mexico', 2000);", "sql": "SELECT country, MAX(hotel_count) FROM Global_Hotels;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average monthly donation per donor for the last 12 months?", "schema": "CREATE TABLE donors (id INT, name VARCHAR(255)); CREATE TABLE donations (id INT, donor_id INT, donation_date DATE, amount DECIMAL(10, 2));", "sql": "SELECT AVG(donations.amount) FROM donations JOIN donors ON donations.donor_id = donors.id WHERE donations.donation_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) AND CURRENT_DATE GROUP BY EXTRACT(MONTH FROM donations.donation_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (zombodb--3000.1.3--3000.1.4, item 2).", "schema": null, "sql": "CREATE FUNCTION zdb.schema_version() RETURNS text LANGUAGE sql AS $$\nSELECT '3000.1.4 (759bfe45fe5b8ee13cdb0100dc49eff9e6dd116a)'\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the total water consumption (in gallons) for each water usage category in the state of Florida in 2021?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); INSERT INTO states (id, name) VALUES (1, 'Florida'); CREATE TABLE water_usage_categories (id INT, name VARCHAR(255)); INSERT INTO water_usage_categories (id, name) VALUES (1, 'Residential'), (2, 'Commercial'), (3, 'Industrial'); CREATE TABLE water_consumption (category_id INT, state_id INT, consumption INT, date DATE); INSERT INTO water_consumption (category_id, state_id, consumption, date) VALUES (1, 1, 5000, '2021-01-01'), (1, 1, 5500, '2021-01-02'), (2, 1, 4000, '2021-01-01'), (2, 1, 4300, '2021-01-02'), (3, 1, 6000, '2021-01-01'), (3, 1, 6500, '2021-01-02');", "sql": "SELECT category_id, state_id, SUM(consumption) as total_consumption FROM water_consumption WHERE state_id = 1 AND date BETWEEN '2021-01-01' AND '2021-12-31' GROUP BY category_id, state_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What is the average horsepower of sports cars released between 2015 and 2020?", "schema": "CREATE TABLE SportsCars (Id INT PRIMARY KEY, Name VARCHAR(100), Year INT, Horsepower INT); INSERT INTO SportsCars (Id, Name, Year, Horsepower) VALUES (1, 'Ferrari 488', 2015, 661), (2, 'Porsche 911 GT3', 2017, 500), (3, 'Audi R8 V10', 2016, 610), (4, 'McLaren 720S', 2017, 720), (5, 'Lamborghini Huracan', 2014, 602), (6, 'Chevrolet Corvette Z06', 2015, 650), (7, 'Mercedes-AMG GT R', 2018, 577), (8, 'Jaguar F-Type SVR', 2016, 575), (9, 'Nissan GT-R Nismo', 2017, 600), (10, 'Aston Martin DB11 V12', 2017, 600);", "sql": "SELECT AVG(Horsepower) FROM SportsCars WHERE Year BETWEEN 2015 AND 2020 AND Name LIKE '%Sports%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Player, when Score is \"76-73-73=222\"?", "schema": "CREATE TABLE table_name_60 (player VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_60 WHERE score = 76 - 73 - 73 = 222;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the location for saturday, april 21", "schema": "CREATE TABLE table_27893892_2 (game_site VARCHAR, date VARCHAR)", "sql": "SELECT game_site FROM table_27893892_2 WHERE date = 'Saturday, April 21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result from 2003 from the US Open?", "schema": "CREATE TABLE table_name_80 (tournament VARCHAR)", "sql": "SELECT 2003 FROM table_name_80 WHERE tournament = 'us open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many wins did Andrew Ranger have?", "schema": "CREATE TABLE table_23239946_3 (wins VARCHAR, driver VARCHAR)", "sql": "SELECT COUNT(wins) FROM table_23239946_3 WHERE driver = 'Andrew Ranger';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of art pieces in the museums of Australia?", "schema": "CREATE TABLE museums (id INT, name VARCHAR(50), location VARCHAR(50), num_pieces INT); INSERT INTO museums (id, name, location, num_pieces) VALUES (1, 'Museum 1', 'Australia', 5000), (2, 'Museum 2', 'United States', 7000), (3, 'Museum 3', 'Canada', 3000);", "sql": "SELECT SUM(num_pieces) FROM museums WHERE location = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result when then opponents conference was Mac (east)?", "schema": "CREATE TABLE table_28418916_3 (result VARCHAR, opponents_conference VARCHAR)", "sql": "SELECT result FROM table_28418916_3 WHERE opponents_conference = 'MAC (East)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the lowest lap with the ranking of 19?", "schema": "CREATE TABLE table_name_99 (laps INTEGER, start VARCHAR)", "sql": "SELECT MIN(laps) FROM table_name_99 WHERE start = '19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2006 when the 2013 is 2r, and a Tournament was the us open?", "schema": "CREATE TABLE table_name_91 (tournament VARCHAR)", "sql": "SELECT 2006 FROM table_name_91 WHERE 2013 = '2r' AND tournament = 'us open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What platform came out before 2009 with the game wii sports?", "schema": "CREATE TABLE table_name_38 (platform_s_ VARCHAR, year VARCHAR, game VARCHAR)", "sql": "SELECT platform_s_ FROM table_name_38 WHERE year < 2009 AND game = 'wii sports';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which countries had the highest and lowest museum attendance in the last 3 years?", "schema": "CREATE TABLE Museums (MuseumID INT, MuseumName VARCHAR(50), Country VARCHAR(50)); CREATE TABLE Attendance (AttendanceID INT, MuseumID INT, Year INT, Visitors INT); INSERT INTO Museums VALUES (1, 'Louvre', 'France'), (2, 'Met', 'USA'), (3, 'British Museum', 'UK'); INSERT INTO Attendance VALUES (1, 1, 2019, 1000000), (2, 1, 2020, 800000), (3, 1, 2021, 900000), (4, 2, 2019, 7000000), (5, 2, 2020, 4000000), (6, 2, 2021, 5000000), (7, 3, 2019, 6000000), (8, 3, 2020, 5000000), (9, 3, 2021, 6500000);", "sql": "SELECT M.Country, MAX(A.Visitors) AS MaxAttendance, MIN(A.Visitors) AS MinAttendance FROM Museums M INNER JOIN Attendance A ON M.MuseumID = A.MuseumID WHERE A.Year BETWEEN 2019 AND 2021 GROUP BY M.Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 607).", "schema": null, "sql": "CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c\n FROM mcv_lists_bool;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the years for the displacement 4.0l (242cid)?", "schema": "CREATE TABLE table_name_35 (years VARCHAR, displacement VARCHAR)", "sql": "SELECT years FROM table_name_35 WHERE displacement = '4.0l (242cid)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many natural disasters were recorded in the 'West Coast' region in 2020?", "schema": "CREATE TABLE regions (id INT, name VARCHAR(255)); CREATE TABLE natural_disasters (id INT, region_id INT, year INT); INSERT INTO regions (id, name) VALUES (1, 'West Coast'); INSERT INTO natural_disasters (id, region_id, year) VALUES (1, 1, 2020);", "sql": "SELECT COUNT(*) FROM natural_disasters WHERE region_id = (SELECT id FROM regions WHERE name = 'West Coast') AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the total number of games played by players in each continent, and what is the average age of players in each continent?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), GamesPlayed INT, Country VARCHAR(20));", "sql": "SELECT CONTINENT(Country) as Continent, COUNT(*) as TotalGames, AVG(Age) as AverageAge FROM Players GROUP BY Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Calculate the total tonnage of cargo transported by vessels with US flag in the second quarter of 2022?", "schema": "CREATE TABLE Flag (flag_id INT PRIMARY KEY, flag_country VARCHAR(255)); INSERT INTO Flag (flag_id, flag_country) VALUES (1, 'United States'); CREATE TABLE Vessel (vessel_id INT PRIMARY KEY, vessel_name VARCHAR(255), flag_id INT); CREATE TABLE Cargo (cargo_id INT PRIMARY KEY, vessel_id INT, cargo_weight INT, PRIMARY KEY (cargo_id, vessel_id)); CREATE TABLE Vessel_Movement (vessel_id INT, movement_date DATE, PRIMARY KEY (vessel_id, movement_date));", "sql": "SELECT SUM(C.cargo_weight) FROM Vessel V JOIN Cargo C ON V.vessel_id = C.vessel_id JOIN Vessel_Movement VM ON V.vessel_id = VM.vessel_id JOIN Flag F ON V.flag_id = F.flag_id WHERE VM.movement_date >= '2022-04-01' AND VM.movement_date < '2022-07-01' AND F.flag_country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 286, "num_statements": 1} {"question": "What is the total number of news articles by each author?", "schema": "CREATE TABLE authors (id INT, name VARCHAR(50)); INSERT INTO authors (id, name) VALUES (1, 'John Doe'), (2, 'Jane Smith'); CREATE TABLE articles (id INT, author_id INT, title VARCHAR(100), content TEXT); INSERT INTO articles (id, author_id, title, content) VALUES (1, 1, 'Article 1', 'Content 1'), (2, 1, 'Article 2', 'Content 2'), (3, 2, 'Article 3', 'Content 3');", "sql": "SELECT a.name, COUNT(*) as total_articles FROM articles a JOIN authors au ON a.author_id = au.id GROUP BY a.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 30).", "schema": null, "sql": "--\n-- OPERATORS\n--\n\nCREATE OPERATOR && (\n\tLEFTARG = _int4,\n\tRIGHTARG = _int4,\n\tPROCEDURE = _int_overlap,\n\tCOMMUTATOR = '&&',\n\tRESTRICT = _int_overlap_sel,\n\tJOIN = _int_overlap_joinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 186, "num_statements": 1} {"question": "What is the total coal production by site in the last quarter?", "schema": "CREATE TABLE site (site_id INT, site_name VARCHAR(50)); INSERT INTO site (site_id, site_name) VALUES (1, 'Site A'), (2, 'Site B'); CREATE TABLE production (production_id INT, site_id INT, product VARCHAR(10), production_date DATE, quantity INT); INSERT INTO production (production_id, site_id, product, production_date, quantity) VALUES (1, 1, 'coal', '2021-01-01', 500), (2, 1, 'coal', '2021-02-01', 600), (3, 1, 'coal', '2021-03-01', 700), (4, 2, 'coal', '2021-01-01', 800), (5, 2, 'coal', '2021-02-01', 900), (6, 2, 'coal', '2021-03-01', 1000);", "sql": "SELECT site_name, SUM(quantity) AS total_coal_production FROM production JOIN site ON production.site_id = site.site_id WHERE product = 'coal' AND production_date >= DATEADD(quarter, -1, GETDATE()) GROUP BY site_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "Find the names and locations of all the museums that have a website.", "schema": "CREATE TABLE museums (name VARCHAR(255), location VARCHAR(255), website VARCHAR(255));", "sql": "SELECT name, location FROM museums WHERE website IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 108).", "schema": null, "sql": "insert into arrtest_f values(3,'cat1',1.18);", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Insert records for new electric vehicle models launched in 2022.", "schema": "CREATE TABLE vehicle (id INT PRIMARY KEY, name VARCHAR(255), production_date DATE, model_year INT); INSERT INTO vehicle (id, name, production_date, model_year) VALUES (1, 'Tesla Model Y', '2022-03-14', 2022), (2, 'Volvo XC40 Recharge', '2022-08-16', 2022), (3, 'Ford Mustang Mach-E', '2022-10-18', 2022);", "sql": "INSERT INTO vehicle (name, production_date, model_year) VALUES ('Hyundai Ioniq 5', '2022-09-01', 2022), ('Kia EV6', '2022-11-01', 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of ladies ranked who had less than 3 silvers, less than 2 total medals, and more than 0 bronze medals?", "schema": "CREATE TABLE table_name_9 (rank VARCHAR, bronze VARCHAR, silver VARCHAR, total VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_9 WHERE silver < 3 AND total < 2 AND bronze > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many artifacts were excavated per site each year?", "schema": "CREATE TABLE excavation_sites (site_id INT, site_name VARCHAR(50), country VARCHAR(50)); INSERT INTO excavation_sites (site_id, site_name, country) VALUES (1, 'Site A', 'USA'); CREATE TABLE artifacts (artifact_id INT, site_id INT, excavation_year INT);", "sql": "SELECT e.site_name, EXTRACT(YEAR FROM a.excavation_date) as excavation_year, COUNT(*) as artifacts_count FROM excavation_sites e JOIN artifacts a ON e.site_id = a.site_id GROUP BY e.site_id, e.site_name, excavation_year ORDER BY site_id, excavation_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "What is the minimum donation amount given by donors from each state?", "schema": "CREATE TABLE donations (id INT, donor_state VARCHAR(255), donation_amount DECIMAL(10,2)); INSERT INTO donations (id, donor_state, donation_amount) VALUES (1, 'California', 1500.00), (2, 'California', 500.00), (3, 'Texas', 2000.00);", "sql": "SELECT donor_state, MIN(donation_amount) FROM donations GROUP BY donor_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Viavoice 5/15/09 has a CSA 5/14/09 of 5%, and a TNS-Sofres 5/28/09 of 4.5%?", "schema": "CREATE TABLE table_name_96 (viavoice_5_15_09 VARCHAR, csa_5_14_09 VARCHAR, tns_sofres_5_28_09 VARCHAR)", "sql": "SELECT viavoice_5_15_09 FROM table_name_96 WHERE csa_5_14_09 = '5%' AND tns_sofres_5_28_09 = '4.5%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "How many cases were opened and closed in each court during the current year?", "schema": "CREATE TABLE CourtCases (ID INT, Court VARCHAR(20), CaseType VARCHAR(20), OpenDate DATE, CloseDate DATE); INSERT INTO CourtCases (ID, Court, CaseType, OpenDate, CloseDate) VALUES (1, 'Court1', 'Civil', '2022-01-01', '2022-06-30'), (2, 'Court2', 'Criminal', '2022-02-01', '2022-12-31'), (3, 'Court1', 'Civil', '2022-04-01', '2022-11-30');", "sql": "SELECT Court, COUNT(CASE WHEN EXTRACT(MONTH FROM OpenDate) = EXTRACT(MONTH FROM CURRENT_DATE) AND EXTRACT(YEAR FROM OpenDate) = EXTRACT(YEAR FROM CURRENT_DATE) THEN 1 END) AS CasesOpened, COUNT(CASE WHEN EXTRACT(MONTH FROM CloseDate) = EXTRACT(MONTH FROM CURRENT_DATE) AND EXTRACT(YEAR FROM CloseDate) = EXTRACT(YEAR FROM CURRENT_DATE) THEN 1 END) AS CasesClosed FROM CourtCases GROUP BY Court;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 394, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location and population of the institution called Bluefield College?", "schema": "CREATE TABLE table_262534_2 (location__population_ VARCHAR, institution VARCHAR)", "sql": "SELECT location__population_ FROM table_262534_2 WHERE institution = 'Bluefield College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Delete all records related to decentralized applications that have been banned in Japan.", "schema": "CREATE TABLE dapps (id INT, name VARCHAR(255), status VARCHAR(255), country VARCHAR(255)); INSERT INTO dapps (id, name, status, country) VALUES (1, 'App 1', 'Banned', 'Japan'), (2, 'App 2', 'Active', 'USA');", "sql": "DELETE FROM dapps WHERE status = 'Banned' AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which clients have experienced multiple legal issues in the last year?", "schema": "CREATE TABLE legal_issues (id INT PRIMARY KEY, client_name VARCHAR(255), issue VARCHAR(255), date DATE); INSERT INTO legal_issues (id, client_name, issue, date) VALUES (1, 'Alex Thompson', 'Divorce', '2021-01-01'); INSERT INTO legal_issues (id, client_name, issue, date) VALUES (2, 'Jamie Patel', 'Custody Battle', '2021-02-01');", "sql": "SELECT v1.client_name, v1.issue, v1.date, COUNT(v2.id) as issue_count FROM legal_issues v1 JOIN legal_issues v2 ON v1.client_name = v2.client_name AND v2.date >= DATEADD(year, -1, v1.date) AND v2.date < v1.date GROUP BY v1.client_name, v1.issue, v1.date HAVING COUNT(v2.id) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 278, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Event of sudesh peiris?", "schema": "CREATE TABLE table_name_93 (event VARCHAR, name VARCHAR)", "sql": "SELECT event FROM table_name_93 WHERE name = 'sudesh peiris';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 14).", "schema": null, "sql": "CREATE FUNCTION _int_contained(_int4, _int4)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the maximum funding amount for Indian biotech startups?", "schema": "CREATE TABLE biotech_startups (id INT, name VARCHAR(100), location VARCHAR(100), funding FLOAT); INSERT INTO biotech_startups (id, name, location, funding) VALUES (1, 'Startup A', 'India', 12000000); INSERT INTO biotech_startups (id, name, location, funding) VALUES (2, 'Startup B', 'India', 18000000); INSERT INTO biotech_startups (id, name, location, funding) VALUES (3, 'Startup C', 'India', 20000000);", "sql": "SELECT MAX(funding) FROM biotech_startups WHERE location = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total value of agricultural crops produced by smallholder farmers in each state of Nigeria, and what percentage do they contribute to the total agricultural production?", "schema": "CREATE TABLE states (id INT, name VARCHAR(50), country VARCHAR(50)); INSERT INTO states (id, name, country) VALUES (1, 'Abia', 'Nigeria'); CREATE TABLE smallholder_farmers (id INT, crop_value FLOAT, state_id INT); INSERT INTO smallholder_farmers (id, crop_value, state_id) VALUES (1, 10000.0, 1); CREATE TABLE total_agricultural_production (state_id INT, total_production FLOAT); INSERT INTO total_agricultural_production (state_id, total_production) VALUES (1, 50000.0);", "sql": "SELECT s.name, SUM(sf.crop_value) as total_value, (SUM(sf.crop_value) / tap.total_production) * 100 as percentage FROM smallholder_farmers sf INNER JOIN states s ON sf.state_id = s.id INNER JOIN total_agricultural_production tap ON sf.state_id = tap.state_id GROUP BY s.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 275, "num_statements": 1} {"question": "Which locations have more than two successful explorations?", "schema": "CREATE TABLE exploration (exploration_id INT, exploration_name VARCHAR(255), location VARCHAR(255), cost FLOAT, result VARCHAR(255)); INSERT INTO exploration (exploration_id, exploration_name, location, cost, result) VALUES (3, 'Exploration C', 'Nigeria', 1200000.0, 'Success'); INSERT INTO exploration (exploration_id, exploration_name, location, cost, result) VALUES (4, 'Exploration D', 'Nigeria', 1500000.0, 'Success'); INSERT INTO exploration (exploration_id, exploration_name, location, cost, result) VALUES (5, 'Exploration E', 'Angola', 1800000.0, 'Success'); INSERT INTO exploration (exploration_id, exploration_name, location, cost, result) VALUES (6, 'Exploration F', 'Angola', 2000000.0, 'Failure');", "sql": "SELECT location, COUNT(*) FROM exploration WHERE result = 'Success' GROUP BY location HAVING COUNT(*) > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'window' (example 144).", "schema": null, "sql": "insert into numerics values\n(0, '-infinity', '-infinity', '-infinity'),\n(1, -3, -3, -3),\n(2, -1, -1, -1),\n(3, 0, 0, 0),\n(4, 1.1, 1.1, 1.1),\n(5, 1.12, 1.12, 1.12),\n(6, 2, 2, 2),\n(7, 100, 100, 100),\n(8, 'infinity', 'infinity', 'infinity'),\n(9, 'NaN', 'NaN', 'NaN');", "explanation": "DML from PostgreSQL core regression test for Window.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 263, "num_statements": 1} {"question": "Calculate the average monthly production cost for the 'Eco-friendly Polymer' chemical", "schema": "CREATE TABLE monthly_cost (chemical VARCHAR(20), month INT, year INT, cost FLOAT); INSERT INTO monthly_cost (chemical, month, year, cost) VALUES ('Eco-friendly Polymer', 1, 2019, 450.25), ('Eco-friendly Polymer', 2, 2019, 470.33), ('Eco-friendly Polymer', 3, 2019, 495.10), ('Eco-friendly Polymer', 1, 2020, 460.00), ('Eco-friendly Polymer', 2, 2020, 480.00), ('Eco-friendly Polymer', 3, 2020, 500.00);", "sql": "SELECT AVG(cost) FROM monthly_cost WHERE chemical = 'Eco-friendly Polymer';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which model was made from 2001–2004, with a Torque of n·m (lb·ft) @ 3750, and an Engine code of n42b18 / n46b18?", "schema": "CREATE TABLE table_name_72 (model VARCHAR, engine_code VARCHAR, years VARCHAR, torque VARCHAR)", "sql": "SELECT model FROM table_name_72 WHERE years = '2001–2004' AND torque = 'n·m (lb·ft) @ 3750' AND engine_code = 'n42b18 / n46b18';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: for the name of jamar jackson what is the class?", "schema": "CREATE TABLE table_name_87 (class VARCHAR, name VARCHAR)", "sql": "SELECT class FROM table_name_87 WHERE name = 'jamar jackson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of accidents that occurred at the mining sites in 2020 where the cause was equipment failure?", "schema": "CREATE TABLE Accidents (AccidentID INT, SiteID INT, Year INT, Cause VARCHAR(255)); INSERT INTO Accidents (AccidentID, SiteID, Year, Cause) VALUES (1, 1, 2020, 'Equipment Failure'), (2, 2, 2019, 'Human Error'), (3, 3, 2020, 'Weather');", "sql": "SELECT COUNT(*) FROM Accidents WHERE Year = 2020 AND Cause = 'Equipment Failure';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all players with highest assists from April 2.", "schema": "CREATE TABLE table_23248910_10 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT high_assists FROM table_23248910_10 WHERE date = 'April 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Delete clinical trial data for 'DrugE' that was approved after 2018.", "schema": "CREATE TABLE clinical_trial_data (drug_name TEXT, trial_status TEXT, approval_date DATE); INSERT INTO clinical_trial_data (drug_name, trial_status, approval_date) VALUES ('DrugE', 'Approved', '2019-01-01');", "sql": "DELETE FROM clinical_trial_data WHERE drug_name = 'DrugE' AND approval_date > '2018-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average energy storage capacity in regions with wind and solar renewable energy sources", "schema": "CREATE TABLE energy_storage (region VARCHAR(20), capacity INT);CREATE TABLE renewable_energy (region VARCHAR(20), source VARCHAR(20));", "sql": "SELECT e.region, AVG(e.capacity) FROM energy_storage e JOIN renewable_energy r ON e.region = r.region WHERE r.source IN ('wind', 'solar') GROUP BY e.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the states where have some college students in tryout.", "schema": "CREATE TABLE college (cName VARCHAR); CREATE TABLE tryout (cName VARCHAR)", "sql": "SELECT DISTINCT state FROM college AS T1 JOIN tryout AS T2 ON T1.cName = T2.cName;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the average size of green roofs in Canada?", "schema": "CREATE TABLE Building (id INT, name VARCHAR(50), city VARCHAR(50), country VARCHAR(50), sqft INT, PRIMARY KEY (id)); INSERT INTO Building (id, name, city, country, sqft) VALUES (3, 'Calgary Tower', 'Calgary', 'Canada', 525600); INSERT INTO Building (id, name, city, country, sqft) VALUES (4, 'CN Tower', 'Toronto', 'Canada', 1090950); CREATE TABLE GreenRoof (id INT, building_id INT, planted_date DATE, size INT, PRIMARY KEY (id), FOREIGN KEY (building_id) REFERENCES Building (id)); INSERT INTO GreenRoof (id, building_id, planted_date, size) VALUES (3, 3, '2015-05-01', 35000); INSERT INTO GreenRoof (id, building_id, planted_date, size) VALUES (4, 4, '2018-07-01', 50000);", "sql": "SELECT AVG(g.size) FROM GreenRoof g JOIN Building b ON g.building_id = b.id WHERE b.country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Combined that has the Victories of 14?", "schema": "CREATE TABLE table_name_53 (combined VARCHAR, victories VARCHAR)", "sql": "SELECT combined FROM table_name_53 WHERE victories = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many virtual tours were conducted in 'Asian' hotels in the past week?", "schema": "CREATE TABLE virtual_tours (tour_id INT, hotel_id INT, date DATE); INSERT INTO virtual_tours (tour_id, hotel_id, date) VALUES (6, 7, '2022-03-02'), (7, 7, '2022-03-05'), (8, 8, '2022-03-03'); CREATE TABLE hotels (hotel_id INT, region VARCHAR(50)); INSERT INTO hotels (hotel_id, region) VALUES (7, 'Asia'), (8, 'Europe'); CREATE TABLE dates (date DATE); INSERT INTO dates (date) VALUES ('2022-03-01'), ('2022-03-02'), ('2022-03-03'), ('2022-03-04'), ('2022-03-05'), ('2022-03-06'), ('2022-03-07');", "sql": "SELECT COUNT(*) FROM virtual_tours JOIN hotels ON virtual_tours.hotel_id = hotels.hotel_id JOIN dates ON virtual_tours.date = dates.date WHERE hotels.region = 'Asia' AND dates.date >= DATEADD(day, -7, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Update the description for ethic with id 1 to 'Minimize harm to subjects'", "schema": "CREATE TABLE ethics (id INT, description VARCHAR(100)); INSERT INTO ethics (id, description) VALUES (1, 'Avoid bias in reporting');", "sql": "UPDATE ethics SET description = 'Minimize harm to subjects' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the GDP (in millions USD) of Hong Kong in 2009?", "schema": "CREATE TABLE table_1496582_1 (gdp_millions_of_usd__2009_ INTEGER, country___territory VARCHAR)", "sql": "SELECT MAX(gdp_millions_of_usd__2009_) FROM table_1496582_1 WHERE country___territory = 'Hong Kong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Club of mexico, Year of 2010, and a Competition of preseason, and a Result of w 0–1?", "schema": "CREATE TABLE table_name_8 (club VARCHAR, result VARCHAR, competition VARCHAR, nation VARCHAR, year VARCHAR)", "sql": "SELECT club FROM table_name_8 WHERE nation = 'mexico' AND year = 2010 AND competition = 'preseason' AND result = 'w 0–1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the retention rate of employees who have completed diversity and inclusion training?", "schema": "CREATE TABLE EmployeeTraining (EmployeeID INT, TrainingType VARCHAR(50), TrainingCompletionDate DATE, EmploymentEndDate DATE); INSERT INTO EmployeeTraining (EmployeeID, TrainingType, TrainingCompletionDate, EmploymentEndDate) VALUES (1, 'Diversity and Inclusion', '2022-01-01', '2023-01-01'), (2, NULL, NULL, '2022-01-01');", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM EmployeeTraining WHERE TrainingType = 'Diversity and Inclusion' AND EmploymentEndDate IS NULL)) FROM EmployeeTraining WHERE TrainingType = 'Diversity and Inclusion' AND EmploymentEndDate IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 248, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most common mill type, and how many are there?", "schema": "CREATE TABLE mill (TYPE VARCHAR)", "sql": "SELECT TYPE, COUNT(*) FROM mill GROUP BY TYPE ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players had the most assists against New Jersey?", "schema": "CREATE TABLE table_15873014_3 (high_assists VARCHAR, team VARCHAR)", "sql": "SELECT COUNT(high_assists) FROM table_15873014_3 WHERE team = 'New Jersey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many customers do we have?", "schema": "CREATE TABLE Customers (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM Customers;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Create a view 'safety_protocols' that includes 'chemical_id', 'chemical_name', and 'safety_rating' columns from 'chemical_inventory' table and 'safety_ratings' table", "schema": "CREATE TABLE safety_ratings (chemical_id INT, safety_rating INT);", "sql": "CREATE VIEW safety_protocols AS SELECT chemical_inventory.chemical_id, chemical_inventory.chemical_name, safety_ratings.safety_rating FROM chemical_inventory INNER JOIN safety_ratings ON chemical_inventory.chemical_id = safety_ratings.chemical_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "What is the total quantity of chemical X produced in Q3 2020, for plants located in India?", "schema": "CREATE TABLE production (plant_id INT, chemical TEXT, quantity INT, quarter INT, year INT); INSERT INTO production (plant_id, chemical, quantity, quarter, year) VALUES (1, 'ChemX', 500, 3, 2020), (2, 'ChemY', 700, 3, 2020), (3, 'ChemX', 450, 3, 2020), (4, 'ChemY', 600, 3, 2020), (5, 'ChemX', 300, 3, 2020), (6, 'ChemX', 800, 4, 2020); CREATE TABLE plants (id INT, name TEXT, location TEXT, PRIMARY KEY (id)); INSERT INTO plants (id, name, location) VALUES (1, 'PlantA', 'IN'), (2, 'PlantB', 'CA'), (3, 'PlantC', 'CN'), (4, 'PlantD', 'AU'), (5, 'PlantE', 'IN'), (6, 'PlantF', 'BR');", "sql": "SELECT SUM(quantity) FROM production INNER JOIN plants ON production.plant_id = plants.id WHERE chemical = 'ChemX' AND location = 'IN' AND quarter = 3 AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the period if the player is Hashan Tillakaratne?", "schema": "CREATE TABLE table_26041144_16 (period VARCHAR, player VARCHAR)", "sql": "SELECT period FROM table_26041144_16 WHERE player = 'Hashan Tillakaratne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'select_into': Write the SELECT query (example 1).", "schema": null, "sql": "SELECT *\n INTO TABLE sitmp1\n FROM onek\n WHERE onek.unique1 < 2;", "explanation": "Regression test for Select Into in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT *\n INTO TABLE sitmp1\n FROM onek\n WHERE onek.unique1 < 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the tournament that had byron black playing against magnus larsson?", "schema": "CREATE TABLE table_name_70 (tournament VARCHAR, opponent VARCHAR)", "sql": "SELECT tournament FROM table_name_70 WHERE opponent = 'magnus larsson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What's the total revenue generated per visitor who attended the 'Ancient Civilization' exhibition?", "schema": "CREATE TABLE Transactions (TransactionID INT, VisitorID INT, Amount DECIMAL(10,2));", "sql": "SELECT AVG(t.Amount) FROM Transactions t JOIN Visitors v ON t.VisitorID = v.VisitorID JOIN Artworks a ON v.VisitorID = a.VisitorID JOIN Exhibitions e ON a.ExhibitionID = e.ExhibitionID WHERE e.ExhibitionName = 'Ancient Civilization';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "Which countries have the most vessels in the vessels table?", "schema": "CREATE TABLE vessels ( id INT, name VARCHAR(255), country VARCHAR(255), capacity INT);", "sql": "SELECT country, COUNT(*) as vessel_count FROM vessels GROUP BY country ORDER BY vessel_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show each location and the number of cinemas there.", "schema": "CREATE TABLE cinema (LOCATION VARCHAR)", "sql": "SELECT LOCATION, COUNT(*) FROM cinema GROUP BY LOCATION;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 480).", "schema": null, "sql": "CREATE FUNCTION is_valid(ismn)\n\tRETURNS boolean\n\tAS 'MODULE_PATHNAME'\n\tLANGUAGE C\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "List the wastewater treatment facilities in Texas and their capacities?", "schema": "CREATE TABLE treatment_facilities (name VARCHAR(50), state VARCHAR(20), capacity INT); INSERT INTO treatment_facilities (name, state, capacity) VALUES ('Facility1', 'Texas', 5000), ('Facility2', 'Texas', 7000);", "sql": "SELECT name, capacity FROM treatment_facilities WHERE state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Delete products that have not been restocked in the last 6 months", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(50), restock_date DATE); INSERT INTO products (product_id, product_name, restock_date) VALUES (1, 'Product A', '2021-01-01'), (2, 'Product B', '2021-05-15'), (3, 'Product C', '2020-12-20');", "sql": "DELETE FROM products WHERE restock_date < DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years had a total earnings amount of 2254598?", "schema": "CREATE TABLE table_22834834_12 (year VARCHAR, earnings__$_ VARCHAR)", "sql": "SELECT COUNT(year) FROM table_22834834_12 WHERE earnings__$_ = 2254598;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many renewable energy projects were completed in 'Texas' in the year 2020?", "schema": "CREATE TABLE projects (id INT, state VARCHAR(50), year INT, type VARCHAR(50)); INSERT INTO projects (id, state, year, type) VALUES (1, 'Texas', 2020, 'Solar'), (2, 'California', 2021, 'Wind');", "sql": "SELECT COUNT(*) FROM projects WHERE state = 'Texas' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the opponent for attendance of 68,264", "schema": "CREATE TABLE table_name_74 (opponent VARCHAR, attendance VARCHAR)", "sql": "SELECT opponent FROM table_name_74 WHERE attendance = '68,264';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the game held where Hawthorn was the away team?", "schema": "CREATE TABLE table_name_46 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_46 WHERE away_team = 'hawthorn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average maintenance cost for military equipment in the Pacific region?", "schema": "CREATE TABLE MilitaryEquipment (Id INT, EquipmentName VARCHAR(50), MaintenanceCost DECIMAL(10,2), Region VARCHAR(50)); INSERT INTO MilitaryEquipment (Id, EquipmentName, MaintenanceCost, Region) VALUES (1, 'Tank', 5000, 'Pacific'), (2, 'Helicopter', 8000, 'Europe');", "sql": "SELECT AVG(MaintenanceCost) FROM MilitaryEquipment WHERE Region = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Week 13 Nov 23 has a Week 6 Oct 5 of oklahoma state (5-0)?", "schema": "CREATE TABLE table_name_73 (week_13_nov_23 VARCHAR, week_6_oct_5 VARCHAR)", "sql": "SELECT week_13_nov_23 FROM table_name_73 WHERE week_6_oct_5 = 'oklahoma state (5-0)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete all records from the 'legal_aid' table where the 'case_value' is less than $5000", "schema": "CREATE TABLE legal_aid (case_id INT, case_value NUMERIC(10,2));", "sql": "WITH deleted_records AS (DELETE FROM legal_aid WHERE case_value < 5000 RETURNING *) SELECT * FROM deleted_records;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 114, "num_statements": 1} {"question": "Delete a specific artifact and its related excavation site", "schema": "CREATE TABLE ExcavationSites (SiteID int, Name varchar(50), Country varchar(50), StartDate date); INSERT INTO ExcavationSites (SiteID, Name, Country, StartDate) VALUES (7, 'Site H', 'India', '2012-02-02'); CREATE TABLE Artifacts (ArtifactID int, SiteID int, Name varchar(50), Description text, DateFound date); INSERT INTO Artifacts (ArtifactID, SiteID, Name, Description, DateFound) VALUES (6, 7, 'Artifact V', 'An Indian artifact', '2016-04-04');", "sql": "DELETE es, a FROM ExcavationSites es INNER JOIN Artifacts a ON es.SiteID = a.SiteID WHERE a.ArtifactID = 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the minimum vertical measurement if the aspect ratio is 16:9 and scanning is interlaced?", "schema": "CREATE TABLE table_272313_1 (vertical INTEGER, aspect_ratio VARCHAR, scanning VARCHAR)", "sql": "SELECT MIN(vertical) FROM table_272313_1 WHERE aspect_ratio = '16:9' AND scanning = 'interlaced';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "List all policies and claim amounts for policyholders in 'ME' or 'MD' that have a claim amount greater than $5000.", "schema": "CREATE TABLE Policyholders (PolicyID INT, PolicyholderName TEXT, State TEXT); INSERT INTO Policyholders (PolicyID, PolicyholderName, State) VALUES (1, 'Jose Hernandez', 'ME'), (2, 'Grace Lee', 'MD'); CREATE TABLE Claims (ClaimID INT, PolicyID INT, ClaimAmount INT); INSERT INTO Claims (ClaimID, PolicyID, ClaimAmount) VALUES (1, 1, 10000), (2, 1, 7000), (3, 2, 3000);", "sql": "SELECT Policyholders.PolicyID, Claims.ClaimAmount FROM Policyholders INNER JOIN Claims ON Policyholders.PolicyID = Claims.PolicyID WHERE Policyholders.State IN ('ME', 'MD') AND Claims.ClaimAmount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What time contains the owner of maine chance farm?", "schema": "CREATE TABLE table_name_55 (time VARCHAR, owner VARCHAR)", "sql": "SELECT time FROM table_name_55 WHERE owner = 'maine chance farm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many players are there in each region?", "schema": "CREATE TABLE player_info (player_id INT, region VARCHAR(255)); INSERT INTO player_info (player_id, region) VALUES (1, 'North America'), (2, 'Europe'), (3, 'Asia'), (4, 'South America');", "sql": "SELECT region, COUNT(*) as num_players FROM player_info GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'select_into' (example 50).", "schema": null, "sql": "CREATE TABLE ctas_ine_tbl AS SELECT 1 / 0; -- error\nCREATE TABLE IF NOT EXISTS ctas_ine_tbl AS SELECT 1 / 0; -- ok\nCREATE TABLE ctas_ine_tbl AS SELECT 1 / 0 WITH NO DATA; -- error\nCREATE TABLE IF NOT EXISTS ctas_ine_tbl AS SELECT 1 / 0 WITH NO DATA; -- ok\nEXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)\n CREATE TABLE ctas_ine_tbl AS SELECT 1 / 0; -- error\nEXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)\n CREATE TABLE IF NOT EXISTS ctas_ine_tbl AS SELECT 1 / 0; -- ok\nEXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)\n CREATE TABLE ctas_ine_tbl AS SELECT 1 / 0 WITH NO DATA; -- error\nEXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)\n CREATE TABLE IF NOT EXISTS ctas_ine_tbl AS SELECT 1 / 0 WITH NO DATA; -- ok\nPREPARE ctas_ine_query AS SELECT 1 / 0;", "explanation": "DDL from PostgreSQL core regression test for Select Into.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 775, "num_statements": 9} {"question": "Generate PostgreSQL SQL for: Name the title that was written by r. scott gemmill", "schema": "CREATE TABLE table_17356042_1 (title VARCHAR, written_by VARCHAR)", "sql": "SELECT title FROM table_17356042_1 WHERE written_by = 'R. Scott Gemmill';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the diameter of the feature found in 1997?", "schema": "CREATE TABLE table_16799784_3 (diameter__km_ VARCHAR, year_named VARCHAR)", "sql": "SELECT diameter__km_ FROM table_16799784_3 WHERE year_named = 1997;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show number of labor disputes by union name", "schema": "CREATE TABLE labor_disputes (id INT, union_name VARCHAR(50), dispute_date DATE, dispute_reason VARCHAR(50)); INSERT INTO labor_disputes (id, union_name, dispute_date, dispute_reason) VALUES (1, 'United Steelworkers', '2019-12-01', 'Wages'), (2, 'Teamsters', '2020-06-15', 'Benefits'), (3, 'Service Employees International Union', '2018-03-03', 'Working conditions');", "sql": "SELECT union_name, COUNT(*) as total_disputes FROM labor_disputes GROUP BY union_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average rating of songs for each language?", "schema": "CREATE TABLE song (languages VARCHAR, rating INTEGER)", "sql": "SELECT AVG(rating), languages FROM song GROUP BY languages;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total watch time for videos in the 'investigative journalism' category, grouped by region?", "schema": "CREATE TABLE videos (id text, category text, region text, watch_time integer); INSERT INTO videos (id, category, region, watch_time) VALUES ('VideoA', 'investigative journalism', 'North America', 3600); INSERT INTO videos (id, category, region, watch_time) VALUES ('VideoB', 'investigative journalism', 'Europe', 4200);", "sql": "SELECT region, SUM(watch_time) as total_watch_time FROM videos WHERE category = 'investigative journalism' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the position of the race with a point of test driver?", "schema": "CREATE TABLE table_name_70 (position VARCHAR, points VARCHAR)", "sql": "SELECT position FROM table_name_70 WHERE points = 'test driver';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total amount of Value ($M), when the Rank was higher than 6, and the % change on year was -27?", "schema": "CREATE TABLE table_name_36 (value__ VARCHAR, rank VARCHAR, _percentage_change_on_year VARCHAR)", "sql": "SELECT COUNT(value__) AS $m_ FROM table_name_36 WHERE rank > 6 AND _percentage_change_on_year = '-27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Insert a new record into the 'fares' table for 'Zone 2' with an effective date of 2022-01-01 and a fare of $3.00", "schema": "CREATE TABLE fares (zone TEXT, fare DECIMAL(5,2), effective_date DATE);", "sql": "INSERT INTO fares (zone, fare, effective_date) VALUES ('Zone 2', 3.00, '2022-01-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the daily revenue trend for the last quarter?", "schema": "CREATE TABLE daily_revenue(date DATE, revenue INT); INSERT INTO daily_revenue VALUES ('2022-01-01', 1000), ('2022-01-02', 1200), ('2022-01-03', 1100), ('2022-01-04', 1300), ('2022-01-05', 1500), ('2022-01-06', 1600), ('2022-01-07', 1700);", "sql": "SELECT date, revenue, ROW_NUMBER() OVER (ORDER BY revenue DESC) as ranking FROM daily_revenue WHERE date >= CURRENT_DATE - INTERVAL '3 months' ORDER BY date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 157, "num_statements": 1} {"question": "Which countries have launched satellites using providers other than SpaceTech Inc. and CosmosLaunch?", "schema": "CREATE TABLE Satellites (country VARCHAR(255), provider VARCHAR(255)); INSERT INTO Satellites (country, provider) VALUES ('Country1', 'SpaceTech Inc.'); INSERT INTO Satellites (country, provider) VALUES ('Country2', 'CosmosLaunch'); INSERT INTO Satellites (country, provider) VALUES ('Country3', 'OtherLaunch');", "sql": "SELECT country FROM Satellites WHERE provider NOT IN ('SpaceTech Inc.', 'CosmosLaunch');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the opponents during game 30?", "schema": "CREATE TABLE table_name_82 (opponent VARCHAR, game VARCHAR)", "sql": "SELECT opponent FROM table_name_82 WHERE game = 30;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the minimum number of accommodations provided, per country?", "schema": "CREATE TABLE Accommodations (ID INT PRIMARY KEY, Country VARCHAR(50), AccommodationType VARCHAR(50), Quantity INT); INSERT INTO Accommodations (ID, Country, AccommodationType, Quantity) VALUES (1, 'USA', 'Sign Language Interpretation', 300), (2, 'Canada', 'Wheelchair Ramp', 250), (3, 'Mexico', 'Assistive Listening Devices', 150);", "sql": "SELECT Country, MIN(Quantity) as Minimum FROM Accommodations GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Determine the average daily production quantity for each well in the Asian region", "schema": "CREATE TABLE daily_production (well_id INT, date DATE, type VARCHAR(10), quantity INT, region VARCHAR(50)); INSERT INTO daily_production (well_id, date, type, quantity, region) VALUES (1, '2022-01-01', 'Oil', 100, 'Asian'), (1, '2022-01-02', 'Oil', 105, 'Asian'), (2, '2022-01-01', 'Gas', 200, 'Asian'), (2, '2022-01-02', 'Gas', 205, 'Asian');", "sql": "SELECT well_id, AVG(quantity) as avg_daily_production FROM daily_production WHERE region = 'Asian' GROUP BY well_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the carbon price in Japanese Yen for each country that has a carbon pricing mechanism?", "schema": "CREATE TABLE Carbon_Pricing (Country VARCHAR(20), Currency VARCHAR(20), Price DECIMAL(10,2)); INSERT INTO Carbon_Pricing VALUES ('Japan', 'JPY', 3000), ('Canada', 'CAD', 20), ('Sweden', 'SEK', 40);", "sql": "SELECT Country, Price * (SELECT AVG(Exchange_Rate) FROM Exchange_Rates WHERE Currency_Code = Carbon_Pricing.Currency) AS Price_In_JPY FROM Carbon_Pricing;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the population density in Buffalo Lake?", "schema": "CREATE TABLE table_2500440_1 (population_density__per_km_2__ VARCHAR, name VARCHAR)", "sql": "SELECT population_density__per_km_2__ FROM table_2500440_1 WHERE name = 'Buffalo Lake';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who won the Modena circuit?", "schema": "CREATE TABLE table_1140116_5 (winning_driver VARCHAR, circuit VARCHAR)", "sql": "SELECT winning_driver FROM table_1140116_5 WHERE circuit = 'Modena';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total production volume of uranium in Canada for the year 2019?", "schema": "CREATE TABLE production (id INT, mine_id INT, year INT, product TEXT, production_volume INT); INSERT INTO production (id, mine_id, year, product, production_volume) VALUES (1, 1, 2019, 'Uranium', 5000);", "sql": "SELECT SUM(production_volume) FROM production WHERE year = 2019 AND product = 'Uranium' AND mine_id IN (SELECT id FROM mines WHERE location = 'Canada');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 458).", "schema": null, "sql": "SELECT * FROM check_test(\n is_empty( 'emptyset', 'whatever' ),\n true,\n 'is_empty(prepared, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 6).", "schema": null, "sql": "SELECT count(*) FROM intervaltmp WHERE a >= '199 days 21:21:23';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Menato Boffa's grid?", "schema": "CREATE TABLE table_name_88 (grid VARCHAR, driver VARCHAR)", "sql": "SELECT grid FROM table_name_88 WHERE driver = 'menato boffa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games have a record of 3-4-0?", "schema": "CREATE TABLE table_21091162_1 (game INTEGER, record VARCHAR)", "sql": "SELECT MAX(game) FROM table_21091162_1 WHERE record = '3-4-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many plastic waste recycling facilities are there in China and Indonesia?", "schema": "CREATE TABLE RecyclingFacilities (facility_id INT, country VARCHAR(50), type VARCHAR(50));", "sql": "SELECT COUNT(*) FROM RecyclingFacilities WHERE country IN ('China', 'Indonesia') AND type = 'plastic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 227).", "schema": null, "sql": "select jsonb_path_match('[{\"a\": 1}, {\"a\": 2}, 3]', 'lax exists($[*].a)', silent => true);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_match('[{\"a\": 1}, {\"a\": 2}, 3]', 'lax exists($[*].a)', silent => true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the match report of the match on 20 August?", "schema": "CREATE TABLE table_name_12 (match_report VARCHAR, date VARCHAR)", "sql": "SELECT match_report FROM table_name_12 WHERE date = '20 august';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What name is associated with League 142?", "schema": "CREATE TABLE table_14962287_2 (name VARCHAR, league VARCHAR)", "sql": "SELECT name FROM table_14962287_2 WHERE league = 142;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What are the total player scores for each game in a specific region?", "schema": "CREATE TABLE GameScores (player_id INT, game_id INT, player_score INT, region VARCHAR(255)); INSERT INTO GameScores (player_id, game_id, player_score, region) VALUES (1, 1, 1500, 'North America'), (2, 1, 1800, 'North America'), (3, 2, 2000, 'Asia'), (4, 2, 1900, 'Asia'), (5, 3, 1200, 'Europe'), (6, 3, 1600, 'Europe');", "sql": "SELECT G.game_name, PS.region, SUM(PS.player_score) as total_score FROM GameScores PS JOIN Games G ON PS.game_id = G.game_id GROUP BY G.game_name, PS.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the monthly revenue trend for a specific hotel in 2022?", "schema": "CREATE TABLE hotel_revenue (hotel_name VARCHAR(20), revenue DECIMAL(10,2), revenue_date DATE); INSERT INTO hotel_revenue (hotel_name, revenue, revenue_date) VALUES ('Hotel A', 15000, '2022-01-01'), ('Hotel A', 16000, '2022-02-01'), ('Hotel A', 18000, '2022-03-01');", "sql": "SELECT revenue_date, SUM(revenue) as monthly_revenue FROM hotel_revenue WHERE hotel_name = 'Hotel A' AND revenue_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY EXTRACT(MONTH FROM revenue_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "What is the difference in the number of visitors between the two exhibitions?", "schema": "CREATE TABLE Exhibition1 (visitor_id INT, primary key(visitor_id)); INSERT INTO Exhibition1 VALUES (1), (2), (3); CREATE TABLE Exhibition2 (visitor_id INT, primary key(visitor_id)); INSERT INTO Exhibition2 VALUES (4), (5), (6), (7);", "sql": "SELECT COUNT(Exhibition1.visitor_id) - COUNT(Exhibition2.visitor_id) AS difference FROM Exhibition1 LEFT JOIN Exhibition2 ON Exhibition1.visitor_id = Exhibition2.visitor_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 10).", "schema": null, "sql": "INSERT INTO t3 VALUES (1, 'sss'), (2, 'ttt'), (3, 'uuu');", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest Year when the City is Peabody?", "schema": "CREATE TABLE table_name_2 (year INTEGER, city VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_2 WHERE city = 'peabody';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which College has Player Mark Brown and a Pick # greater than 195?", "schema": "CREATE TABLE table_name_63 (college VARCHAR, pick__number VARCHAR, player VARCHAR)", "sql": "SELECT college FROM table_name_63 WHERE pick__number > 195 AND player = 'mark brown';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average age of artifacts in the 'Ancient_Ceramics' table?", "schema": "CREATE TABLE Ancient_Ceramics (id INT, artifact_name VARCHAR(50), age INT); INSERT INTO Ancient_Ceramics (id, artifact_name, age) VALUES (1, 'Pottery Shard A', 2000), (2, 'Pottery Shard B', 3000), (3, 'Pottery Shard C', 1500);", "sql": "SELECT AVG(age) FROM Ancient_Ceramics;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "What is the count of impact investments in South Asia with a value greater than 10000?", "schema": "CREATE TABLE impact_investments (id INT, value INT, location VARCHAR(50)); INSERT INTO impact_investments (id, value, location) VALUES (1, 12000, 'South Asia'), (2, 7000, 'East Asia'), (3, 15000, 'South Asia');", "sql": "SELECT COUNT(*) FROM impact_investments WHERE location = 'South Asia' AND value > 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the distribution of case outcomes (won, lost, settled) for attorneys in the 'attorneys_outcomes' table, grouped by attorney age?", "schema": "CREATE TABLE attorney_age (attorney_id INT, age INT); CREATE TABLE attorneys_outcomes (case_outcome VARCHAR(10), attorney_id INT);", "sql": "SELECT a.age, o.case_outcome, COUNT(*) AS count FROM attorney_age a JOIN attorneys_outcomes o ON a.attorney_id = o.attorney_id GROUP BY a.age, o.case_outcome;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 518).", "schema": null, "sql": "SELECT t1.id1, t1.result, t2.expected\n FROM num_result t1, num_exp_ln t2\n WHERE t1.id1 = t2.id\n AND t1.result != t2.expected;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT t1.id1, t1.result, t2.expected\n FROM num_result t1, num_exp_ln t2\n WHERE t1.id1 = t2.id\n AND t1.result != t2.expected) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Update the salary of the council member representing district 3 to $90,000.00.", "schema": "CREATE TABLE City_Council (council_id INT, council_member VARCHAR(50), district_number INT, salary DECIMAL(10,2), PRIMARY KEY (council_id)); INSERT INTO City_Council (council_id, council_member, district_number, salary) VALUES (1, 'James Smith', 1, 85000.00), (2, 'Katherine Johnson', 2, 85000.00), (3, 'Mohammed Ahmed', 3, 80000.00);", "sql": "UPDATE City_Council SET salary = 90000.00 WHERE district_number = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which states have more than 300 wastewater treatment plants?", "schema": "CREATE TABLE state_plants (state TEXT, num_plants INT); INSERT INTO state_plants (state, num_plants) VALUES ('California', 321), ('Texas', 456), ('New York', 123), ('Florida', 345);", "sql": "SELECT state FROM state_plants WHERE num_plants > 300;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the crowd size for the game where Footscray was the away team?", "schema": "CREATE TABLE table_name_21 (crowd VARCHAR, away_team VARCHAR)", "sql": "SELECT COUNT(crowd) FROM table_name_21 WHERE away_team = 'footscray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many esports events were held in Japan in 2022?", "schema": "CREATE TABLE EsportsEvents (EventID INT, Country VARCHAR(20), Year INT); INSERT INTO EsportsEvents (EventID, Country, Year) VALUES (1, 'Japan', 2022);", "sql": "SELECT COUNT(*) FROM EsportsEvents WHERE Country = 'Japan' AND Year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the total is 6, what is the maximum R?", "schema": "CREATE TABLE table (r INTEGER, total VARCHAR)", "sql": "SELECT MAX(r) FROM table WHERE total = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 103).", "schema": null, "sql": "SELECT * FROM check_test(\n index_owner_is('someschema', 'anothertab', 'idx_name', current_user),\n\ttrue,\n 'index_owner_is(schema, table, index, user)',\n 'Index idx_name ON someschema.anothertab should be owned by ' || quote_ident(current_user),\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 262, "num_statements": 1} {"question": "What is the difference in revenue between the top and bottom album for each artist?", "schema": "CREATE TABLE AlbumRevenue (AlbumID INT, ArtistID INT, Revenue DECIMAL(10,2)); INSERT INTO AlbumRevenue (AlbumID, ArtistID, Revenue) VALUES (1, 1, 150000.00), (2, 1, 125000.00), (3, 2, 150000.00), (4, 2, 100000.00), (5, 3, 100000.00); CREATE TABLE Artist (ArtistID INT, ArtistName VARCHAR(50)); INSERT INTO Artist (ArtistID, ArtistName) VALUES (1, 'Taylor Swift'), (2, 'BTS'), (3, 'Adele');", "sql": "SELECT ArtistName, MAX(Revenue) - MIN(Revenue) AS RevenueDifference FROM (SELECT ArtistID, Revenue, ROW_NUMBER() OVER (PARTITION BY ArtistID ORDER BY Revenue DESC) AS Rank FROM AlbumRevenue) AS Subquery JOIN Artist ON Subquery.ArtistID = Artist.ArtistID GROUP BY ArtistName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 274, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 28).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_inherited_tables( 'child2', 'Gimme inheritance' ),\n true,\n 'hasnt_inherited_tables(tab, desc)',\n 'Gimme inheritance',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Delete records in the \"renewable_energy\" table where the \"source\" is 'hydro' and the \"capacity_mw\" is greater than 500", "schema": "CREATE TABLE renewable_energy ( id INT PRIMARY KEY, source VARCHAR(50), capacity_mw INT ); INSERT INTO renewable_energy (id, source, capacity_mw) VALUES (1, 'hydro', 600), (2, 'solar', 300), (3, 'wind', 400);", "sql": "DELETE FROM renewable_energy WHERE source = 'hydro' AND capacity_mw > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the average account balance for socially responsible lending customers, partitioned by gender?", "schema": "CREATE TABLE socially_responsible_lending(customer_id INT, name VARCHAR(50), gender VARCHAR(10), account_balance DECIMAL(10, 2)); INSERT INTO socially_responsible_lending VALUES (1, 'John Doe', 'Male', 5000), (2, 'Jane Smith', 'Female', 7000), (3, 'Ali Hassan', 'Male', 6000), (4, 'Fatima Khan', 'Female', 8000);", "sql": "SELECT gender, AVG(account_balance) avg_balance FROM socially_responsible_lending WINDOW W AS (PARTITION BY gender);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 116, "num_statements": 1} {"question": "Insert a new professional development course with a unique course_id, course_name, and number of students who have completed the course, and update the corresponding teacher's record accordingly.", "schema": "CREATE TABLE courses (course_id INT, course_name TEXT, num_completions INT); CREATE TABLE professional_development (pd_id INT, teacher_id INT, course_id INT);", "sql": "INSERT INTO courses (course_id, course_name, num_completions) VALUES (12345, 'Python for Educators', 50); UPDATE professional_development pd SET pd.course_id = 12345 WHERE EXISTS (SELECT * FROM teachers t JOIN courses c ON t.teacher_id = pd.teacher_id WHERE c.course_id = 12345 AND pd.course_id != 12345);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 305, "num_statements": 2} {"question": "What is the maximum number of mental health parity cases by state?", "schema": "CREATE TABLE mental_health_parity (state VARCHAR(255), cases INT); INSERT INTO mental_health_parity (state, cases) VALUES ('California', 500), ('New York', 600), ('Texas', 450), ('Florida', 400);", "sql": "SELECT state, MAX(cases) FROM mental_health_parity GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Find the clients who have taken out the most socially responsible loans.", "schema": "CREATE TABLE socially_responsible_loans(client_id INT, country VARCHAR(25));INSERT INTO socially_responsible_loans(client_id, country) VALUES (1, 'Malaysia'), (2, 'UAE'), (3, 'Indonesia'), (4, 'Saudi Arabia'), (1, 'Malaysia'), (2, 'UAE'), (7, 'Indonesia'), (8, 'Saudi Arabia'), (1, 'Malaysia'), (2, 'UAE');", "sql": "SELECT client_id, COUNT(*) as num_loans FROM socially_responsible_loans GROUP BY client_id ORDER BY num_loans DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the average playtime for each player in the \"Shooter\" genre?", "schema": "CREATE TABLE PlayerPlaytime (PlayerID int, PlayerName varchar(50), Game varchar(50), Playtime decimal(10,2));", "sql": "SELECT PlayerName, AVG(Playtime) OVER(PARTITION BY PlayerID) as AvgPlaytime FROM PlayerPlaytime WHERE Game = 'Shooter';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 119, "num_statements": 1} {"question": "What is the average training time for creative AI applications?", "schema": "CREATE TABLE creative_ai_applications_training_times (id INT, application VARCHAR(25), training_time FLOAT); INSERT INTO creative_ai_applications_training_times (id, application, training_time) VALUES (1, 'Art Generation', 2.5), (2, 'Music Composition', 3.2), (3, 'Text Generation', 2.8), (4, 'Image Recognition', 1.5), (5, 'Speech Recognition', 4.2);", "sql": "SELECT AVG(training_time) as avg_training_time FROM creative_ai_applications_training_times;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Who is the user with the highest number of login attempts in the past month?", "schema": "CREATE TABLE login_attempts (id INT, user TEXT, timestamp TIMESTAMP); INSERT INTO login_attempts (id, user, timestamp) VALUES (1, 'user1', '2021-01-01 12:00:00'), (2, 'user2', '2021-01-15 14:30:00'), (3, 'user1', '2021-02-01 10:15:00'), (4, 'user3', '2021-02-04 11:20:00'), (5, 'user2', '2021-02-04 18:30:00'), (6, 'user4', '2021-02-06 16:45:00');", "sql": "SELECT user, COUNT(*) AS login_attempts_count FROM login_attempts WHERE timestamp >= NOW() - INTERVAL '1 month' GROUP BY user ORDER BY login_attempts_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "What is the total number of comments on posts related to mental health, published by users in Japan, in the month of February 2022?", "schema": "CREATE TABLE posts (post_id INT, user_id INT, followers INT, post_date DATE, content TEXT); CREATE TABLE comments (comment_id INT, post_id INT, user_id INT, comment_date DATE, comment_text TEXT);", "sql": "SELECT SUM(c.comments) FROM posts p JOIN comments c ON p.post_id = c.post_id WHERE p.content LIKE '%mental health%' AND p.country = 'Japan' AND p.post_date >= '2022-02-01' AND p.post_date < '2022-03-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the partner at the Tampa tournament?", "schema": "CREATE TABLE table_name_93 (partner VARCHAR, tournament VARCHAR)", "sql": "SELECT partner FROM table_name_93 WHERE tournament = 'tampa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the competition of 20 august 2008", "schema": "CREATE TABLE table_name_87 (competition VARCHAR, date VARCHAR)", "sql": "SELECT competition FROM table_name_87 WHERE date = '20 august 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Which members have the most workout duration in the last month?", "schema": "CREATE TABLE workout_history (id INT, member_id INT, duration INT, date DATE, type VARCHAR(50)); CREATE VIEW total_duration_by_member AS SELECT member_id, SUM(duration) total_duration FROM workout_history GROUP BY member_id; ALTER TABLE total_duration_by_member ADD COLUMN rank INT; UPDATE total_duration_by_member SET rank = ROW_NUMBER() OVER (ORDER BY total_duration DESC);", "sql": "SELECT w.name, t.total_duration, t.rank FROM total_duration_by_member t JOIN membership w ON t.member_id = w.id ORDER BY t.rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What first class team does sanath jayasuriya play for?", "schema": "CREATE TABLE table_name_51 (first_class_team VARCHAR, player VARCHAR)", "sql": "SELECT first_class_team FROM table_name_51 WHERE player = 'sanath jayasuriya';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every entry for assists if the player is Lynn Pride?", "schema": "CREATE TABLE table_25352324_5 (assists VARCHAR, player VARCHAR)", "sql": "SELECT assists FROM table_25352324_5 WHERE player = 'Lynn Pride';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 349).", "schema": null, "sql": "select jsonb_path_query('12345678901234567890', '$.bigint()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('12345678901234567890', '$.bigint()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score when essendon was the home team?", "schema": "CREATE TABLE table_name_75 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_75 WHERE home_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the status when the artist is Neil Sedaka?", "schema": "CREATE TABLE table_name_31 (status VARCHAR, artist VARCHAR)", "sql": "SELECT status FROM table_name_31 WHERE artist = 'neil sedaka';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total revenue for the month of July?", "schema": "CREATE TABLE revenue (id INT, month INT, amount DECIMAL(5,2)); INSERT INTO revenue (id, month, amount) VALUES (1, 6, 5000.00), (2, 7, 6000.00), (3, 8, 7000.00), (4, 9, 8000.00);", "sql": "SELECT SUM(amount) FROM revenue WHERE month = 7;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Which city has the highest number of electric taxi rides in a month?", "schema": "CREATE TABLE electric_taxis (taxi_id INT, ride_id INT, start_time TIMESTAMP, end_time TIMESTAMP, city VARCHAR(255));", "sql": "SELECT city, COUNT(*) as num_rides FROM electric_taxis WHERE ride_id BETWEEN 1 AND 100000 GROUP BY city ORDER BY num_rides DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 790).", "schema": null, "sql": "select ('123'::jsonb)[0];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select ('123'::jsonb)[0]) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 25, "num_statements": 1} {"question": "What is the average number of hours worked per week by gender, partitioned by mining operation?", "schema": "CREATE TABLE mining_operations (id INT, name TEXT, location TEXT, gender TEXT, hours_worked INT); INSERT INTO mining_operations (id, name, location, gender, hours_worked) VALUES (1, 'Operation X', 'Australia-NSW', 'Male', 45), (2, 'Operation Y', 'Australia-QLD', 'Female', 40), (3, 'Operation Z', 'Australia-NSW', 'Non-binary', 35), (4, 'Operation A', 'Australia-QLD', 'Male', 50);", "sql": "SELECT name, gender, AVG(hours_worked) FROM mining_operations GROUP BY name, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 68.", "schema": null, "sql": "CREATE FUNCTION stamp_user(id int, comment text) RETURNS void AS $$ #variable_conflict use_variable DECLARE curtime timestamp := now(); BEGIN UPDATE users SET last_modified = curtime, comment = comment WHERE users.id = id; END; $$ LANGUAGE plpgsql;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 248, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: What is the least round number for Jon Olinger, who was picked before pick # 24?", "schema": "CREATE TABLE table_name_3 (round INTEGER, name VARCHAR, pick__number VARCHAR)", "sql": "SELECT MIN(round) FROM table_name_3 WHERE name = 'jon olinger' AND pick__number < 24;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the maximum severity and number of occurrences for each threat type, considering only records reported in the last year?", "schema": "CREATE TABLE threat_intelligence (threat_id INT, threat_type VARCHAR(255), severity INT, last_reported_date DATE, reported_by VARCHAR(255));", "sql": "SELECT threat_type, MAX(severity), COUNT(*) FROM threat_intelligence WHERE last_reported_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 1 YEAR) AND CURDATE() GROUP BY threat_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Insert a new record into the 'Menu' table with id 4, name 'Fish Tacos', price 8.99, and vegetarian false.", "schema": "CREATE TABLE Menu (id INT, name VARCHAR(255), price DECIMAL(5,2), vegetarian BOOLEAN); INSERT INTO Menu (id, name, price, vegetarian) VALUES (1, 'Chicken Burger', 7.99, FALSE), (2, 'Veggie Wrap', 6.49, TRUE), (3, 'Chicken Caesar Salad', 9.99, FALSE);", "sql": "INSERT INTO Menu (id, name, price, vegetarian) VALUES (4, 'Fish Tacos', 8.99, FALSE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the most popular clothing item size in Canada?", "schema": "CREATE TABLE clothing_inventory (id INT, item_name VARCHAR(255), size VARCHAR(10), quantity INT, country VARCHAR(50));", "sql": "SELECT item_name, size, SUM(quantity) as total_quantity FROM clothing_inventory WHERE country = 'Canada' GROUP BY item_name, size ORDER BY total_quantity DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Role of narrator, and a Year larger than 2009, and a Release/Air Date of 7 october 2010 belongs to what author?", "schema": "CREATE TABLE table_name_62 (author VARCHAR, release_air_date VARCHAR, role VARCHAR, year VARCHAR)", "sql": "SELECT author FROM table_name_62 WHERE role = 'narrator' AND year > 2009 AND release_air_date = '7 october 2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Show all agricultural projects in 'Rural Kenya'", "schema": "CREATE TABLE agricultural_projects (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(50), funding_source VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO agricultural_projects (id, name, location, funding_source, start_date, end_date) VALUES (1, 'Solar Powered Irrigation', 'Rural Kenya', 'World Bank', '2022-01-01', '2023-12-31'), (2, 'Crop Diversification', 'Rural Peru', 'USAID', '2022-06-15', '2024-06-14');", "sql": "SELECT * FROM agricultural_projects WHERE location = 'Rural Kenya';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the most common type of crime committed in the South district?", "schema": "CREATE TABLE crimes (id INT, district VARCHAR(20), type VARCHAR(20), date DATE); INSERT INTO crimes (id, district, type, date) VALUES (1, 'Downtown', 'Theft', '2022-01-02'); INSERT INTO crimes (id, district, type, date) VALUES (2, 'Uptown', 'Vandalism', '2022-01-03'); INSERT INTO crimes (id, district, type, date) VALUES (3, 'Westside', 'Assault', '2022-01-04'); INSERT INTO crimes (id, district, type, date) VALUES (4, 'East End', 'Theft', '2022-01-05'); INSERT INTO crimes (id, district, type, date) VALUES (5, 'South', 'Theft', '2022-01-06'); INSERT INTO crimes (id, district, type, date) VALUES (6, 'South', 'Vandalism', '2022-01-07');", "sql": "SELECT type, COUNT(*) AS count FROM crimes WHERE district = 'South' GROUP BY type ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the rank when then time was 7:52.53?", "schema": "CREATE TABLE table_name_10 (rank INTEGER, time VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_10 WHERE time = '7:52.53';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total aid amount provided by each aid agency?", "schema": "CREATE TABLE Aid_Agency (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50), donor_id INT, FOREIGN KEY (donor_id) REFERENCES Donor(id)); INSERT INTO Aid_Agency (id, name, type, donor_id) VALUES (1, 'UNHCR', 'Refugee Support', 1); INSERT INTO Aid_Agency (id, name, type, donor_id) VALUES (2, 'WFP', 'Food Security', 2); CREATE TABLE Aid_Distribution (id INT PRIMARY KEY, disaster_id INT, aid_amount FLOAT, aid_agency_id INT, FOREIGN KEY (disaster_id) REFERENCES Disaster(id), FOREIGN KEY (aid_agency_id) REFERENCES Aid_Agency(id)); INSERT INTO Aid_Distribution (id, disaster_id, aid_amount, aid_agency_id) VALUES (1, 1, 100000, 1); INSERT INTO Aid_Distribution (id, disaster_id, aid_amount, aid_agency_id) VALUES (2, 2, 150000, 2);", "sql": "SELECT a.name as agency_name, SUM(ad.aid_amount) as total_aid FROM Aid_Agency a JOIN Aid_Distribution ad ON a.id = ad.aid_agency_id GROUP BY a.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "List the unique species and their conservation status in the Arctic region.", "schema": "CREATE TABLE species (id INT, species_name VARCHAR, conservation_status VARCHAR); INSERT INTO species VALUES (1, 'Polar Bear', 'Vulnerable');", "sql": "SELECT DISTINCT species_name, conservation_status FROM species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the maximum number of points scored by a player in a single NBA game, and who was the player?", "schema": "CREATE TABLE games (game_id INT, date DATE, team1 TEXT, team2 TEXT, player TEXT, points INT);", "sql": "SELECT player, MAX(points) FROM games;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "What is the change in temperature for each country between 2019 and 2020 in the 'world_temperature' table?", "schema": "CREATE TABLE world_temperature (country VARCHAR(255), temperature DECIMAL(5,2), measurement_date DATE); INSERT INTO world_temperature (country, temperature, measurement_date) VALUES ('Canada', 10.5, '2019-01-01'), ('Mexico', 22.3, '2019-01-01'), ('Canada', 12.0, '2020-01-01'), ('Mexico', 25.1, '2020-01-01');", "sql": "SELECT a.country, (a.temperature - b.temperature) as temperature_change FROM world_temperature a INNER JOIN world_temperature b ON a.country = b.country WHERE YEAR(a.measurement_date) = 2020 AND YEAR(b.measurement_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 227, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Winner, when Team is \"R.J.MacArthur Onslow\", and when Circuit is \"Oran Park Raceway\"?", "schema": "CREATE TABLE table_name_29 (winner VARCHAR, team VARCHAR, circuit VARCHAR)", "sql": "SELECT winner FROM table_name_29 WHERE team = 'r.j.macarthur onslow' AND circuit = 'oran park raceway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What company collaborated in pre-clinical autoimmune disease and inflammation?", "schema": "CREATE TABLE table_name_99 (collaboration VARCHAR, status VARCHAR, indication VARCHAR)", "sql": "SELECT collaboration FROM table_name_99 WHERE status = 'pre-clinical' AND indication = 'autoimmune disease and inflammation';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the countries of perpetrators? Show each country and the corresponding number of perpetrators there.", "schema": "CREATE TABLE perpetrator (Country VARCHAR)", "sql": "SELECT Country, COUNT(*) FROM perpetrator GROUP BY Country;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the minimum property price in accessible neighborhoods in New York?", "schema": "CREATE TABLE NYC_Neighborhoods (Neighborhood_Name TEXT, Accessibility BOOLEAN); INSERT INTO NYC_Neighborhoods (Neighborhood_Name, Accessibility) VALUES ('Manhattan', true), ('Brooklyn', false), ('Queens', true), ('Bronx', true), ('Staten Island', false); CREATE TABLE NYC_Properties (Neighborhood_Name TEXT, Property_Price INTEGER); INSERT INTO NYC_Properties (Neighborhood_Name, Property_Price) VALUES ('Manhattan', 1000000), ('Brooklyn', 800000), ('Queens', 900000), ('Bronx', 700000), ('Staten Island', 600000);", "sql": "SELECT MIN(NYC_Properties.Property_Price) FROM NYC_Properties INNER JOIN NYC_Neighborhoods ON NYC_Properties.Neighborhood_Name = NYC_Neighborhoods.Neighborhood_Name WHERE NYC_Neighborhoods.Accessibility = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_index' (example 449).", "schema": null, "sql": "INSERT INTO concur_reindex_tab VALUES (1, 'a');", "explanation": "DML from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many total wins with the latest win at the 1999 Italian Grand Prix at a rank of 15?", "schema": "CREATE TABLE table_name_12 (wins INTEGER, latest_win VARCHAR, rank VARCHAR)", "sql": "SELECT SUM(wins) FROM table_name_12 WHERE latest_win = '1999 italian grand prix' AND rank > 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Find the total size of fish farms in 'oceans' schema where the size is greater than 50.", "schema": "CREATE SCHEMA oceans; CREATE TABLE fish_farms (id INT, size FLOAT, location VARCHAR(20)); INSERT INTO fish_farms (id, size, location) VALUES (1, 55.2, 'ocean'), (2, 62.5, 'ocean'), (3, 70.3, 'ocean');", "sql": "SELECT SUM(size) FROM oceans.fish_farms WHERE size > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the current landfill capacity in Mumbai, India and the projected capacity for the year 2030?'", "schema": "CREATE TABLE landfill_capacity (city VARCHAR(20), current_capacity INT, projected_capacity INT); INSERT INTO landfill_capacity (city, current_capacity, projected_capacity) VALUES ('Mumbai', 5000, 7000);", "sql": "SELECT current_capacity as current_landfill_capacity, projected_capacity as projected_landfill_capacity FROM landfill_capacity WHERE city = 'Mumbai';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What is the ratio of concert ticket prices between Pop and Hip Hop genres?", "schema": "CREATE TABLE artists (name VARCHAR(50), genre VARCHAR(50)); INSERT INTO artists (name, genre) VALUES ('Beyoncé', 'Pop'), ('Drake', 'Hip Hop'), ('Taylor Swift', 'Country Pop'), ('Kendrick Lamar', 'Hip Hop'); CREATE TABLE concerts (artist_name VARCHAR(50), venue VARCHAR(50), ticket_price DECIMAL(5,2)); INSERT INTO concerts (artist_name, venue, ticket_price) VALUES ('Beyoncé', 'Madison Square Garden', 200.00), ('Beyoncé', 'Staples Center', 180.00), ('Drake', 'Barclays Center', 150.00), ('Taylor Swift', 'MetLife Stadium', 250.00);", "sql": "SELECT 100.0 * AVG(CASE WHEN genre = 'Pop' THEN ticket_price END) / AVG(CASE WHEN genre = 'Hip Hop' THEN ticket_price END) AS ticket_price_ratio FROM artists JOIN concerts ON artists.name = concerts.artist_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the translation of chetvert?", "schema": "CREATE TABLE table_name_81 (translation VARCHAR, unit VARCHAR)", "sql": "SELECT translation FROM table_name_81 WHERE unit = 'chetvert';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What province is Hamilton part of?", "schema": "CREATE TABLE table_name_66 (province VARCHAR, city VARCHAR)", "sql": "SELECT province FROM table_name_66 WHERE city = 'hamilton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the names of companies in the banking or retailing industry?", "schema": "CREATE TABLE company (Name VARCHAR, Industry VARCHAR)", "sql": "SELECT Name FROM company WHERE Industry = 'Banking' OR Industry = 'Retailing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me lived for name of 1994/2003", "schema": "CREATE TABLE table_name_99 (lived_when___mya__ VARCHAR, discovery___publication_of_name VARCHAR)", "sql": "SELECT lived_when___mya__ FROM table_name_99 WHERE discovery___publication_of_name = '1994/2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Update animal count in 'conservation_program' for Panda", "schema": "CREATE TABLE conservation_program (id INT PRIMARY KEY, animal_name VARCHAR, num_animals INT); INSERT INTO conservation_program (id, animal_name, num_animals) VALUES (1, 'Tiger', 300), (2, 'Panda', 150), (3, 'Rhino', 70), (4, 'Elephant', 450);", "sql": "UPDATE conservation_program SET num_animals = 200 WHERE animal_name = 'Panda';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score in Kathmandu?", "schema": "CREATE TABLE table_name_96 (score VARCHAR, venue VARCHAR)", "sql": "SELECT score FROM table_name_96 WHERE venue = 'kathmandu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the local economic impact of tourism in London in 2020?", "schema": "CREATE TABLE tourism_impact (city VARCHAR(100), year INT, local_economic_impact INT); INSERT INTO tourism_impact (city, year, local_economic_impact) VALUES ('Paris', 2020, 12000000); INSERT INTO tourism_impact (city, year, local_economic_impact) VALUES ('London', 2020, 10000000);", "sql": "SELECT local_economic_impact FROM tourism_impact WHERE city = 'London' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Bronze when the rank is 7, and the total is more than 22?", "schema": "CREATE TABLE table_name_19 (bronze INTEGER, rank VARCHAR, total VARCHAR)", "sql": "SELECT SUM(bronze) FROM table_name_19 WHERE rank = 7 AND total > 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the combined area of all the habitats where the 'Giant Panda' is found in the 'habitat_animals' table?", "schema": "CREATE TABLE habitat_animals (id INT, animal_name VARCHAR(50), habitat_name VARCHAR(50), area FLOAT); INSERT INTO habitat_animals (id, animal_name, habitat_name, area) VALUES (1, 'Giant Panda', 'Panda Reserve', 50.0), (2, 'Giant Panda', 'Forest', 100.0), (3, 'Tiger', 'Jungle', 200.0);", "sql": "SELECT SUM(area) FROM habitat_animals WHERE animal_name = 'Giant Panda';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average home game attendance for the Lakers?", "schema": "CREATE TABLE teams (team_id INT, team_name VARCHAR(255)); INSERT INTO teams (team_id, team_name) VALUES (1, 'Lakers'); CREATE TABLE venues (venue_id INT, venue_name VARCHAR(255)); INSERT INTO venues (venue_id, venue_name) VALUES (1, 'Staples Center'); CREATE TABLE games (game_id INT, team_id INT, venue_id INT, attendance INT); INSERT INTO games (game_id, team_id, venue_id, attendance) VALUES (1, 1, 1, 10000);", "sql": "SELECT AVG(games.attendance) FROM games INNER JOIN teams ON games.team_id = teams.team_id INNER JOIN venues ON games.venue_id = venues.venue_id WHERE teams.team_name = 'Lakers' AND venues.venue_name = 'Staples Center';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "What is the maximum virtual tour engagement duration in Tokyo, Japan?", "schema": "CREATE TABLE virtual_tours (tour_id INT, hotel_name VARCHAR(255), city VARCHAR(255), country VARCHAR(255), duration INT); INSERT INTO virtual_tours (tour_id, hotel_name, city, country, duration) VALUES (1, 'Hotel Park Hyatt', 'Tokyo', 'Japan', 240), (2, 'Hotel Imperial', 'Tokyo', 'Japan', 300);", "sql": "SELECT MAX(duration) FROM virtual_tours WHERE city = 'Tokyo' AND country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the total number of hotels in Brazil that are certified as sustainable?", "schema": "CREATE TABLE brazil_hotels (name VARCHAR(255), location VARCHAR(255), certification DATE); INSERT INTO brazil_hotels (name, location, certification) VALUES ('Ecohotel Quinta do Buchenau', 'São Paulo, Brazil', '2018-02-14');", "sql": "SELECT COUNT(*) FROM brazil_hotels WHERE certification IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "List all employees who have not completed diversity training, along with their hire dates.", "schema": "CREATE TABLE Employees (EmployeeID INT, HireDate DATETIME, CompletedDiversityTraining BOOLEAN); INSERT INTO Employees (EmployeeID, HireDate, CompletedDiversityTraining) VALUES (1, '2020-01-01', true); INSERT INTO Employees (EmployeeID, HireDate, CompletedDiversityTraining) VALUES (2, '2019-05-15', false);", "sql": "SELECT EmployeeID, HireDate FROM Employees WHERE CompletedDiversityTraining = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result on the wild card?", "schema": "CREATE TABLE table_name_74 (result VARCHAR, week VARCHAR)", "sql": "SELECT result FROM table_name_74 WHERE week = 'wild card';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which years have a Decile of 4, an Authority of state, and a Name of mapiu school?", "schema": "CREATE TABLE table_name_43 (years VARCHAR, name VARCHAR, decile VARCHAR, authority VARCHAR)", "sql": "SELECT years FROM table_name_43 WHERE decile = 4 AND authority = 'state' AND name = 'mapiu school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Location has a Method of decision (unanimous), and Res of win x?", "schema": "CREATE TABLE table_name_99 (location VARCHAR, method VARCHAR, res VARCHAR)", "sql": "SELECT location FROM table_name_99 WHERE method = 'decision (unanimous)' AND res = 'win x';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the maximum number of concerts performed by any artist in a single year?", "schema": "CREATE TABLE Concerts (ConcertID INT, Title VARCHAR(50), ArtistID INT, Venue VARCHAR(50), Year INT, Revenue INT); INSERT INTO Concerts VALUES (1, 'Jazz Concert', 1, 'New York', 2022, 5000); INSERT INTO Concerts VALUES (2, 'Rock Concert', 2, 'Toronto', 2022, 8000); INSERT INTO Concerts VALUES (3, 'Jazz Concert', 1, 'Chicago', 2022, 6000); INSERT INTO Concerts VALUES (4, 'Pop Concert', 2, 'Montreal', 2021, 7000);", "sql": "SELECT MAX(C.Performances) FROM (SELECT ArtistID, COUNT(DISTINCT Year) AS Performances FROM Concerts GROUP BY ArtistID) C;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find all the payment dates for the payments with an amount larger than 10 and the payments handled by a staff person with the first name Elsa.", "schema": "CREATE TABLE payment (payment_date VARCHAR, staff_id VARCHAR); CREATE TABLE staff (staff_id VARCHAR, first_name VARCHAR); CREATE TABLE payment (payment_date VARCHAR, amount INTEGER)", "sql": "SELECT payment_date FROM payment WHERE amount > 10 UNION SELECT T1.payment_date FROM payment AS T1 JOIN staff AS T2 ON T1.staff_id = T2.staff_id WHERE T2.first_name = 'Elsa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the moving average of customer orders in the past 7 days?", "schema": "CREATE TABLE orders(id INT, date DATE, quantity INT); INSERT INTO orders(id, date, quantity) VALUES (1, '2022-05-01', 3), (2, '2022-05-03', 2), (3, '2022-05-05', 5);", "sql": "SELECT date, AVG(quantity) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_average FROM orders WHERE date >= CURRENT_DATE - INTERVAL '7 days';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of rank for lane 7", "schema": "CREATE TABLE table_name_69 (rank VARCHAR, lane VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_69 WHERE lane = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the nickname of the team in the East Geelong club?", "schema": "CREATE TABLE table_name_93 (nickname VARCHAR, club VARCHAR)", "sql": "SELECT nickname FROM table_name_93 WHERE club = 'east geelong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show distinct types of artworks that are nominated in festivals in 2007.", "schema": "CREATE TABLE nomination (Artwork_ID VARCHAR, Festival_ID VARCHAR); CREATE TABLE festival_detail (Festival_ID VARCHAR, Year VARCHAR); CREATE TABLE artwork (Type VARCHAR, Artwork_ID VARCHAR)", "sql": "SELECT DISTINCT T2.Type FROM nomination AS T1 JOIN artwork AS T2 ON T1.Artwork_ID = T2.Artwork_ID JOIN festival_detail AS T3 ON T1.Festival_ID = T3.Festival_ID WHERE T3.Year = 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the average age of construction workers in Texas?", "schema": "CREATE TABLE construction_workers (id INT, name VARCHAR(50), age INT, state VARCHAR(2)); INSERT INTO construction_workers (id, name, age, state) VALUES (1, 'John Doe', 35, 'Texas'); INSERT INTO construction_workers (id, name, age, state) VALUES (2, 'Jane Smith', 40, 'Texas');", "sql": "SELECT AVG(age) FROM construction_workers WHERE state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the high lap total for mika salo with a grid greater than 17?", "schema": "CREATE TABLE table_name_66 (laps INTEGER, driver VARCHAR, grid VARCHAR)", "sql": "SELECT MAX(laps) FROM table_name_66 WHERE driver = 'mika salo' AND grid > 17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the winning score on Aug 28, 1983?", "schema": "CREATE TABLE table_name_89 (winning_score VARCHAR, date VARCHAR)", "sql": "SELECT winning_score FROM table_name_89 WHERE date = 'aug 28, 1983';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 332).", "schema": null, "sql": "SELECT * FROM check_test(\n bag_has( 'anames', 'subset' ),\n true,\n 'bag_has( prepared, subprepared )',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the number of mental health parity violations for each community health worker in the Los Angeles region?", "schema": "CREATE TABLE community_health_workers (worker_id INT, name TEXT, region TEXT); INSERT INTO community_health_workers (worker_id, name, region) VALUES (1, 'John Doe', 'Los Angeles'), (2, 'Jane Smith', 'New York'); CREATE TABLE mental_health_parity_violations (id INT, worker_id INT, violation_count INT); INSERT INTO mental_health_parity_violations (id, worker_id, violation_count) VALUES (1, 1, 5), (2, 1, 3), (3, 2, 1);", "sql": "SELECT c.name, m.violation_count FROM community_health_workers c JOIN mental_health_parity_violations m ON c.worker_id = m.worker_id WHERE c.region = 'Los Angeles';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which episode number in season 5 was viewed by 3.00 million U.S. viziers?", "schema": "CREATE TABLE table_11820086_1 (no_in_season INTEGER, us_viewers__millions_ VARCHAR)", "sql": "SELECT MIN(no_in_season) FROM table_11820086_1 WHERE us_viewers__millions_ = '3.00';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "pgTAP test for Throwtap (assertion 17).", "schema": null, "sql": "SELECT * FROM check_test(\n lives_ok( 'EXECUTE livetest'),\n true,\n 'lives_ok(execute)'\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Throwtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which 2007 has a 2010 of A?", "schema": "CREATE TABLE table_name_92 (Id VARCHAR)", "sql": "SELECT 2007 FROM table_name_92 WHERE 2010 = 'a';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the most common apartment type code among apartments with more than 1 bathroom.", "schema": "CREATE TABLE Apartments (apt_type_code VARCHAR, bathroom_count INTEGER)", "sql": "SELECT apt_type_code FROM Apartments WHERE bathroom_count > 1 GROUP BY apt_type_code ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average donation by age group?", "schema": "CREATE TABLE Donations (ID INT, DonorAge INT, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (ID, DonorAge, DonationAmount) VALUES (1, 25, 50.00), (2, 35, 100.00), (3, 45, 25.00);", "sql": "SELECT FLOOR(DonorAge/10)*10 AS AgeGroup, AVG(DonationAmount) as AvgDonation FROM Donations GROUP BY AgeGroup;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 8).", "schema": null, "sql": "SELECT * FROM test_money WHERE i>='1'::money ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Update the conservation status of the 'Hawaiian monk seal' to 'Endangered' in the 'marine_species' table.", "schema": "CREATE TABLE marine_species (id INT, name VARCHAR(255), conservation_status VARCHAR(255)); INSERT INTO marine_species (id, name, conservation_status) VALUES (1, 'Pacific salmon', 'Vulnerable'), (2, 'Hawaiian monk seal', 'Threatened'), (3, 'Blue whale', 'Endangered');", "sql": "UPDATE marine_species SET conservation_status = 'Endangered' WHERE name = 'Hawaisn monk seal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Update environmental impact score for site 4", "schema": "CREATE TABLE impact (id INT PRIMARY KEY, site_id INT, impact_score INT);", "sql": "UPDATE impact SET impact_score = 85 WHERE site_id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the home team for the match at Stadion pod Vrmcem?", "schema": "CREATE TABLE table_name_77 (home VARCHAR, venue VARCHAR)", "sql": "SELECT home FROM table_name_77 WHERE venue = 'stadion pod vrmcem';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "List the names of attorneys who have never lost a case.", "schema": "CREATE TABLE cases (case_id INT, case_outcome VARCHAR(10), attorney_id INT); INSERT INTO cases (case_id, case_outcome, attorney_id) VALUES (1, 'Won', 101), (2, 'Lost', 102), (3, 'Won', 101); CREATE TABLE attorneys (attorney_id INT, attorney_name VARCHAR(20)); INSERT INTO attorneys (attorney_id, attorney_name) VALUES (101, 'Smith'), (102, 'Johnson'), (103, 'Williams');", "sql": "SELECT a.attorney_name FROM attorneys a LEFT JOIN cases c ON a.attorney_id = c.attorney_id WHERE c.case_outcome IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Who are the developers of the smart contract 'Uniswap'?", "schema": "CREATE TABLE smart_contract_developers (developer_id INT PRIMARY KEY, developer_name TEXT, contract_name TEXT); INSERT INTO smart_contract_developers (developer_id, developer_name, contract_name) VALUES (1, 'Vitalik Buterin', 'Uniswap'), (2, 'Hayden Adams', 'Uniswap');", "sql": "SELECT developer_name FROM smart_contract_developers WHERE contract_name = 'Uniswap';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many bronze numbers had a total of more than 4 when the rank is less than four, germany is involved, and there's less than 5 silver?", "schema": "CREATE TABLE table_name_67 (bronze VARCHAR, silver VARCHAR, nation VARCHAR, total VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(bronze) FROM table_name_67 WHERE total > 4 AND rank < 4 AND nation = 'germany' AND silver < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Delete the record with id 1 in the table 'esg_investments'.", "schema": "CREATE TABLE esg_investments (id INT, country VARCHAR(255), amount FLOAT); INSERT INTO esg_investments (id, country, amount) VALUES (1, 'Canada', 6000000), (2, 'Mexico', 7000000);", "sql": "DELETE FROM esg_investments WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total revenue for each product in the 'seafood_sales' table?", "schema": "CREATE TABLE seafood_sales (region VARCHAR(255), product VARCHAR(255), revenue DECIMAL(8,2), quantity INT); INSERT INTO seafood_sales (region, product, revenue, quantity) VALUES ('North', 'Tilapia', 1250.00, 500), ('South', 'Salmon', 3500.00, 800), ('North', 'Catfish', 2000.00, 600), ('East', 'Tilapia', 1750.00, 450), ('East', 'Salmon', 3000.00, 700), ('West', 'Tilapia', 2500.00, 550), ('West', 'Catfish', 2200.00, 400), ('South', 'Tilapia', 2750.00, 700);", "sql": "SELECT product, SUM(revenue) as total_revenue FROM seafood_sales GROUP BY product;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type was issued in 1964?", "schema": "CREATE TABLE table_name_1 (type VARCHAR, issued VARCHAR)", "sql": "SELECT type FROM table_name_1 WHERE issued = 1964;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Player who has a CFL Team of toronto?", "schema": "CREATE TABLE table_name_24 (player VARCHAR, cfl_team VARCHAR)", "sql": "SELECT player FROM table_name_24 WHERE cfl_team = 'toronto';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 110).", "schema": null, "sql": "SELECT * FROM check_test(\n index_owner_is('sometab', 'idx_hey', current_user),\n\ttrue,\n 'index_owner_is(table, index, user)',\n 'Index idx_hey ON sometab should be owned by ' || quote_ident(current_user),\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "What is the distribution of AI safety incidents by month for the AI subfield of algorithmic fairness in 2021?", "schema": "CREATE TABLE ai_safety_incidents (incident_id INT, incident_date DATE, ai_subfield TEXT, incident_description TEXT); INSERT INTO ai_safety_incidents (incident_id, incident_date, ai_subfield, incident_description) VALUES (1, '2021-01-05', 'Algorithmic Fairness', 'AI system showed bias against certain groups'); INSERT INTO ai_safety_incidents (incident_id, incident_date, ai_subfield, incident_description) VALUES (2, '2021-03-15', 'Algorithmic Fairness', 'AI system showed disparate impact'); INSERT INTO ai_safety_incidents (incident_id, incident_date, ai_subfield, incident_description) VALUES (3, '2021-04-01', 'Algorithmic Fairness', 'AI system showed bias against certain groups');", "sql": "SELECT DATE_PART('month', incident_date) as month, COUNT(*) as incidents FROM ai_safety_incidents WHERE incident_date BETWEEN '2021-01-01' AND '2021-12-31' AND ai_subfield = 'Algorithmic Fairness' GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is week 1 if week 3 is Natasha Budhi?", "schema": "CREATE TABLE table_name_3 (week_1 VARCHAR, week_3 VARCHAR)", "sql": "SELECT week_1 FROM table_name_3 WHERE week_3 = 'natasha budhi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Delete all community policing events in district 3 before '2020-01-01'", "schema": "CREATE TABLE districts (id INT, name VARCHAR(255)); INSERT INTO districts (id, name) VALUES (3, 'Greenhills'); CREATE TABLE community_policing_events (id INT, district_id INT, event_date DATE); INSERT INTO community_policing_events (id, district_id, event_date) VALUES (3001, 3, '2020-01-02'), (3002, 3, '2020-01-03'), (3003, 3, '2019-12-31');", "sql": "DELETE FROM community_policing_events WHERE event_date < '2020-01-01' AND district_id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Place, when Year is greater than 2006, and when Champion is \"Asvel\"?", "schema": "CREATE TABLE table_name_37 (place VARCHAR, year VARCHAR, champion VARCHAR)", "sql": "SELECT place FROM table_name_37 WHERE year > 2006 AND champion = 'asvel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "List all fans who have not attended any games in the last year", "schema": "CREATE TABLE fans (fan_id INT, gender VARCHAR(10), last_attended_game DATE); INSERT INTO fans (fan_id, gender, last_attended_game) VALUES (1, 'Male', '2022-02-15'), (2, 'Female', '2022-03-01'), (3, 'Male', '2021-12-31'), (4, 'Female', '2022-01-01'), (5, 'Male', '2022-03-15'), (6, 'Female', NULL);", "sql": "SELECT * FROM fans WHERE last_attended_game IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Yugoslavia play a friendly game in Belgrade?", "schema": "CREATE TABLE table_name_88 (date VARCHAR, city VARCHAR, type_of_game VARCHAR)", "sql": "SELECT date FROM table_name_88 WHERE city = 'belgrade' AND type_of_game = 'friendly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "List the programs that have a higher total budget compared to the total budget for all programs combined, and their respective budgets.", "schema": "CREATE TABLE programs(id INT, name TEXT, budget FLOAT);", "sql": "SELECT programs.name, programs.budget FROM programs WHERE programs.budget > (SELECT SUM(budget) FROM programs) - programs.budget;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "How many publications have been made in the Journal of Applied Mathematics by graduate students in the Mathematics department?", "schema": "CREATE TABLE Students (StudentID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Program VARCHAR(50)); CREATE TABLE Publications (PublicationID INT, Title VARCHAR(100), Author VARCHAR(50), Year INT, Journal VARCHAR(50), StudentID INT);", "sql": "SELECT COUNT(PublicationID) as 'Number of Publications' FROM Publications p JOIN Students s ON p.StudentID = s.StudentID WHERE s.Department = 'Mathematics' AND Journal = 'Journal of Applied Mathematics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "What is the total data usage for postpaid and prepaid customers, sorted by data usage in descending order?", "schema": "CREATE TABLE mobile_customers (customer_id INT, plan_type VARCHAR(10), data_usage FLOAT, region VARCHAR(20)); INSERT INTO mobile_customers (customer_id, plan_type, data_usage, region) VALUES (1, 'postpaid', 3.5, 'Chicago'), (2, 'prepaid', 2.0, 'Chicago'), (3, 'postpaid', 5.0, 'New York'); CREATE TABLE plan_types (plan_type VARCHAR(10)); INSERT INTO plan_types (plan_type) VALUES ('postpaid'), ('prepaid');", "sql": "SELECT pt.plan_type, SUM(mc.data_usage) AS total_data_usage FROM mobile_customers mc JOIN plan_types pt ON mc.plan_type = pt.plan_type GROUP BY pt.plan_type ORDER BY total_data_usage DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What is the number of marine species with data on ocean acidification?", "schema": "CREATE TABLE marine_species (id INT, species_name TEXT, ocean_acidification_data BOOLEAN); INSERT INTO marine_species (id, species_name, ocean_acidification_data) VALUES (1, 'Coral', true); INSERT INTO marine_species (id, species_name, ocean_acidification_data) VALUES (2, 'Humpback Whale', false);", "sql": "SELECT COUNT(*) FROM marine_species WHERE ocean_acidification_data = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the player for south carolina college?", "schema": "CREATE TABLE table_20871703_1 (player_name VARCHAR, college VARCHAR)", "sql": "SELECT player_name FROM table_20871703_1 WHERE college = 'South Carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 37).", "schema": null, "sql": "SELECT * FROM check_test(\n relation_owner_is('__not__apart', current_user, 'mumble'),\n\tfalse,\n 'relation_owner_is(non-part, user)',\n 'mumble',\n ' Relation __not__apart does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "What is the total weight of items in the Singapore warehouse?", "schema": "CREATE TABLE Warehouse (id INT, location VARCHAR(50), quantity INT, weight FLOAT); INSERT INTO Warehouse (id, location, quantity, weight) VALUES (1, 'USA', 300, 12.5), (2, 'Canada', 250, 11.0), (3, 'France', 500, 13.2), (4, 'Germany', 400, 14.7), (5, 'UK', 300, 15.3), (6, 'Japan', 450, 16.1), (7, 'Brazil', 200, 17.5), (8, 'Russia', 600, 18.0), (9, 'Singapore', 150, 20.0);", "sql": "SELECT SUM(weight) FROM Warehouse WHERE location = 'Singapore';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the national final main host when the semi final/heat host shows internal selection in 2011?", "schema": "CREATE TABLE table_name_83 (national_final_main_host VARCHAR, semi_final_heat_host VARCHAR, year_s_ VARCHAR)", "sql": "SELECT national_final_main_host FROM table_name_83 WHERE semi_final_heat_host = 'internal selection' AND year_s_ = '2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the car that was made in the years 1956-1958?", "schema": "CREATE TABLE table_name_18 (automobile_name VARCHAR, year VARCHAR)", "sql": "SELECT automobile_name FROM table_name_18 WHERE year = '1956-1958';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Geelong's home team score?", "schema": "CREATE TABLE table_name_89 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_89 WHERE home_team = 'geelong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest year a game was played in Athens?", "schema": "CREATE TABLE table_name_95 (year INTEGER, location VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_95 WHERE location = 'athens';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many public schools in the Education department have a budget over $1,000,000?", "schema": "CREATE TABLE Education_Dept (ID INT, School_Type VARCHAR(255), Budget FLOAT); INSERT INTO Education_Dept (ID, School_Type, Budget) VALUES (1, 'Public', 1200000), (2, 'Public', 800000), (3, 'Private', 900000);", "sql": "SELECT COUNT(*) FROM Education_Dept WHERE School_Type = 'Public' AND Budget > 1000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 1293).", "schema": null, "sql": "INSERT INTO result_tbl SELECT * FROM async_pt WHERE b % 100 = 0;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which natural skincare brands in the USA have the highest revenue growth?", "schema": "CREATE TABLE skincare_revenue (id INT, brand VARCHAR(50), revenue DECIMAL(10,2), year INT, country VARCHAR(50)); INSERT INTO skincare_revenue (id, brand, revenue, year, country) VALUES (1, 'Brand C', 1000.00, 2021, 'USA');", "sql": "SELECT brand, (revenue - LAG(revenue, 1) OVER (PARTITION BY brand ORDER BY year)) / ABS(LAG(revenue, 1) OVER (PARTITION BY brand ORDER BY year)) * 100.0 AS growth_percentage FROM skincare_revenue WHERE country = 'USA' AND revenue IS NOT NULL GROUP BY brand, revenue, year HAVING growth_percentage IS NOT NULL ORDER BY growth_percentage DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 341, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 84).", "schema": null, "sql": "CREATE TABLE stxdinh2() INHERITS(stxdinh);", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1} {"question": "Which traditional art types have the most pieces in the database?", "schema": "CREATE TABLE ArtTypes (ArtTypeID INT, ArtType VARCHAR(50)); CREATE TABLE ArtPieces (ArtPieceID INT, ArtTypeID INT, ArtistID INT, Year INT); INSERT INTO ArtTypes VALUES (1, 'Painting'), (2, 'Sculpture'), (3, 'Textile'), (4, 'Pottery'); INSERT INTO ArtPieces VALUES (1, 1, 1, 2010), (2, 1, 2, 2015), (3, 2, 3, 2005), (4, 2, 4, 2020), (5, 3, 5, 2018), (6, 3, 6, 2021), (7, 4, 7, 2019);", "sql": "SELECT ArtTypes.ArtType, COUNT(ArtPieces.ArtPieceID) AS ArtPieceCount FROM ArtTypes INNER JOIN ArtPieces ON ArtTypes.ArtTypeID = ArtPieces.ArtTypeID GROUP BY ArtTypes.ArtType ORDER BY ArtPieceCount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Update the waste generation for plastic in the city of Chicago to 2000 grams in the second quarter of 2021.", "schema": "CREATE TABLE waste_generation (city VARCHAR(255), quarter INT, material_type VARCHAR(255), generation_grams INT); INSERT INTO waste_generation (city, quarter, material_type, generation_grams) VALUES ('Chicago', 2, 'Plastic', 1800);", "sql": "UPDATE waste_generation SET generation_grams = 2000 WHERE city = 'Chicago' AND quarter = 2 AND material_type = 'Plastic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "How many solar power projects were implemented in Australia, South Africa, and Egypt between 2016 and 2021?", "schema": "CREATE TABLE solar_projects_2 (project_id INT, country VARCHAR(50), start_year INT, end_year INT); INSERT INTO solar_projects_2 (project_id, country, start_year, end_year) VALUES (1, 'Australia', 2017, 2021), (2, 'South Africa', 2018, 2020), (3, 'Egypt', 2016, 2019), (4, 'Australia', 2019, 2022), (5, 'South Africa', 2017, 2021), (6, 'Egypt', 2018, 2021), (7, 'Australia', 2016, 2018);", "sql": "SELECT COUNT(*) FROM solar_projects_2 WHERE country IN ('Australia', 'South Africa', 'Egypt') AND start_year BETWEEN 2016 AND 2021 AND end_year BETWEEN 2016 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the hometown where the average age is 16", "schema": "CREATE TABLE table_26267849_2 (hometown VARCHAR, age_s_ VARCHAR)", "sql": "SELECT hometown FROM table_26267849_2 WHERE age_s_ = '16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How may population figures are given for Settimo Torinese", "schema": "CREATE TABLE table_1449176_1 (population VARCHAR, common_of VARCHAR)", "sql": "SELECT COUNT(population) FROM table_1449176_1 WHERE common_of = 'Settimo Torinese';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game with a loss of Maholm (2–4)?", "schema": "CREATE TABLE table_name_81 (score VARCHAR, loss VARCHAR)", "sql": "SELECT score FROM table_name_81 WHERE loss = 'maholm (2–4)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total number of security incidents in the education sector?", "schema": "CREATE TABLE security_incidents (id INT, sector VARCHAR(20), incident VARCHAR(50)); INSERT INTO security_incidents (id, sector, incident) VALUES (1, 'Education', 'Phishing Attack');", "sql": "SELECT COUNT(*) FROM security_incidents WHERE sector = 'Education';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'box': Write the SELECT query (example 80).", "schema": null, "sql": "SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))';", "explanation": "Regression test for Box in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM quad_box_tbl WHERE b <<| box '((100,200),(300,500))') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country was the golfer with a score of 72-72-72=216 representing?", "schema": "CREATE TABLE table_name_78 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_78 WHERE score = 72 - 72 - 72 = 216;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Home captain for the Test match of Australia in England at the Edgbaston Venue?", "schema": "CREATE TABLE table_name_99 (home_captain VARCHAR, venue VARCHAR)", "sql": "SELECT home_captain FROM table_name_99 WHERE venue = 'edgbaston';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many cases were handled by attorneys in the 'New York' office?", "schema": "CREATE TABLE offices (office_id INT, office_name VARCHAR(20), city VARCHAR(20), state VARCHAR(20)); INSERT INTO offices (office_id, office_name, city, state) VALUES (1, 'Boston', 'Boston', 'MA'), (2, 'New York', 'New York', 'NY'); CREATE TABLE attorneys (attorney_id INT, office_id INT); INSERT INTO attorneys (attorney_id, office_id) VALUES (1, 1), (2, 2); CREATE TABLE cases (case_id INT, attorney_id INT); INSERT INTO cases (case_id, attorney_id) VALUES (1, 1), (2, 2);", "sql": "SELECT COUNT(*) FROM cases JOIN attorneys ON cases.attorney_id = attorneys.attorney_id JOIN offices ON attorneys.office_id = offices.office_id WHERE offices.city = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "How many Mars missions were launched by NASA?", "schema": "CREATE TABLE mars_missions (id INT, name VARCHAR(50), agency VARCHAR(50), year INT); INSERT INTO mars_missions (id, name, agency, year) VALUES (1, 'Mariner 4', 'NASA', 1964); INSERT INTO mars_missions (id, name, agency, year) VALUES (2, 'Viking 1', 'NASA', 1975); INSERT INTO mars_missions (id, name, agency, year) VALUES (3, 'Phoenix', 'NASA', 2007);", "sql": "SELECT COUNT(*) FROM mars_missions WHERE agency = 'NASA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points does catcat have with more than 2 draws?", "schema": "CREATE TABLE table_name_21 (points INTEGER, artist VARCHAR, draw VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_21 WHERE artist = 'catcat' AND draw > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Get the number of items made of recycled materials", "schema": "CREATE TABLE items (id INT, name VARCHAR(255), material VARCHAR(255)); INSERT INTO items (id, name, material) VALUES (1, 'Jacket', 'recycled polyester'), (2, 'Hat', 'recycled paper'), (3, 'Shirt', 'cotton');", "sql": "SELECT COUNT(*) FROM items WHERE material LIKE '%recycled%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total number of electric vehicles by manufacturer, grouped by country, with a count greater than 500?", "schema": "CREATE TABLE Manufacturers (ManufacturerID INT, ManufacturerName VARCHAR(100), Country VARCHAR(50)); INSERT INTO Manufacturers (ManufacturerID, ManufacturerName, Country) VALUES (1, 'Tesla', 'USA'), (2, 'Nissan', 'Japan'), (3, 'BMW', 'Germany'); CREATE TABLE ElectricVehicles (EVID INT, ManufacturerID INT, Model VARCHAR(50), Year INT); INSERT INTO ElectricVehicles (EVID, ManufacturerID, Model, Year) VALUES (1, 1, 'Model S', 2012), (2, 1, 'Model 3', 2017), (3, 2, 'Leaf', 2010), (4, 3, 'i3', 2013);", "sql": "SELECT Country, ManufacturerName, COUNT(*) as Total FROM ElectricVehicles EV JOIN Manufacturers M ON EV.ManufacturerID = M.ManufacturerID GROUP BY Country, ManufacturerName HAVING COUNT(*) > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 215).", "schema": null, "sql": "SELECT '{\"f2\":[\"f3\",1],\"f4\":{\"f5\":99,\"f6\":\"stringy\"}}'::jsonb#>array['f2','1'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '{\"f2\":[\"f3\",1],\"f4\":{\"f5\":99,\"f6\":\"stringy\"}}'::jsonb#>array['f2','1']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which League from has a Player of louis domingue?", "schema": "CREATE TABLE table_name_48 (league_from VARCHAR, player VARCHAR)", "sql": "SELECT league_from FROM table_name_48 WHERE player = 'louis domingue';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List the total number of workouts and unique members who participated in CrossFit classes in the last month.", "schema": "CREATE TABLE workouts (id INT, member_id INT, workout_type VARCHAR(20), workout_date DATE);", "sql": "SELECT COUNT(DISTINCT id) as total_members, COUNT(*) as total_workouts FROM workouts WHERE workout_type = 'CrossFit' AND workout_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Which retailers are located in countries that import garments from suppliers located in Africa?", "schema": "CREATE TABLE supplier_garments (supplier_id INT, garment_id INT); INSERT INTO supplier_garments (supplier_id, garment_id) VALUES (1, 1); INSERT INTO supplier_garments (supplier_id, garment_id) VALUES (2, 2);", "sql": "SELECT retailers.name FROM retailers JOIN countries ON retailers.country = countries.name JOIN imports ON countries.code = imports.destination_country WHERE imports.source_continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "What is the number of animals in the 'community_education' table that belong to endangered species?", "schema": "CREATE TABLE community_education (id INT, animal_name VARCHAR(50)); CREATE TABLE endangered_species (id INT, animal_name VARCHAR(50), endangered_status VARCHAR(50));", "sql": "SELECT COUNT(*) FROM community_education ce INNER JOIN endangered_species es ON ce.animal_name = es.animal_name WHERE endangered_status = 'Endangered';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the sum of yards for jason campbell and long less than 23", "schema": "CREATE TABLE table_name_29 (yards INTEGER, player VARCHAR, long VARCHAR)", "sql": "SELECT SUM(yards) FROM table_name_29 WHERE player = 'jason campbell' AND long < 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent on September 8?", "schema": "CREATE TABLE table_name_24 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_24 WHERE date = 'september 8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Insert a new record into the design_standards table with the name 'Bridge Building Standard' and description 'Standard for building bridges' for the region 'North America'", "schema": "CREATE TABLE design_standards (id INT PRIMARY KEY, standard_name VARCHAR(255), description TEXT, region VARCHAR(255)); INSERT INTO design_standards (id, standard_name, description, region) VALUES (1, 'Highway Design Standard', 'Standard for designing highways', 'North America'); INSERT INTO design_standards (id, standard_name, description, region) VALUES (2, 'Railway Design Standard', 'Standard for designing railways', 'Europe');", "sql": "INSERT INTO design_standards (standard_name, description, region) VALUES ('Bridge Building Standard', 'Standard for building bridges', 'North America');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Week of the game against Green Bay Packers?", "schema": "CREATE TABLE table_name_71 (week INTEGER, opponent VARCHAR)", "sql": "SELECT AVG(week) FROM table_name_71 WHERE opponent = 'green bay packers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the current status of the person named Nicholls?", "schema": "CREATE TABLE table_name_65 (status VARCHAR, name VARCHAR)", "sql": "SELECT status FROM table_name_65 WHERE name = 'nicholls';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Find the title and genre of the bottom 2 movies with the lowest ratings from studios based in France, ordered by ratings in ascending order.", "schema": "CREATE TABLE movies (title VARCHAR(255), genre VARCHAR(255), studio VARCHAR(255), rating FLOAT); INSERT INTO movies (title, genre, studio, rating) VALUES ('Movie7', 'Action', 'France Studio1', 6.5), ('Movie8', 'Drama', 'France Studio2', 5.0);", "sql": "SELECT title, genre FROM (SELECT title, genre, studio, rating, ROW_NUMBER() OVER (PARTITION BY studio ORDER BY rating ASC) as rank FROM movies WHERE studio LIKE '%France%') subquery WHERE rank <= 2 ORDER BY rating ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 218, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For engines of Maserati Straight-6 and entrants of H H Gould, what is the latest year?", "schema": "CREATE TABLE table_name_14 (year INTEGER, engine VARCHAR, entrant VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_14 WHERE engine = 'maserati straight-6' AND entrant = 'h h gould';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the total number of blocks by the Hawks' Trae Young in the 2016-2017 season?", "schema": "CREATE TABLE teams (team_name VARCHAR(255), season_start_year INT, season_end_year INT); INSERT INTO teams (team_name, season_start_year, season_end_year) VALUES ('Hawks', 2016, 2017); CREATE TABLE players (player_name VARCHAR(255), team_name VARCHAR(255), blocks INT);", "sql": "SELECT SUM(blocks) FROM players WHERE player_name = 'Trae Young' AND team_name = 'Hawks' AND season_start_year = 2016 AND season_end_year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was Montreal a visitor?", "schema": "CREATE TABLE table_name_34 (date VARCHAR, visitor VARCHAR)", "sql": "SELECT date FROM table_name_34 WHERE visitor = 'montreal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Is the right halfback player a starter?", "schema": "CREATE TABLE table_25517718_3 (starter VARCHAR, position VARCHAR)", "sql": "SELECT starter FROM table_25517718_3 WHERE position = 'Right halfback';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "List all projects with a start date on or after 2022-07-01", "schema": "CREATE TABLE project_timeline (project_id INT, start_date DATE, end_date DATE);", "sql": "SELECT * FROM project_timeline WHERE start_date >= '2022-07-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result when Dundee were the Opponent at Venue A on 6 September 1986?", "schema": "CREATE TABLE table_name_9 (result VARCHAR, date VARCHAR, opponent VARCHAR, venue VARCHAR)", "sql": "SELECT result FROM table_name_9 WHERE opponent = 'dundee' AND venue = 'a' AND date = '6 september 1986';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "List the number of research grants awarded to each department in the past year.", "schema": "CREATE TABLE Departments(DepartmentID INT, Department VARCHAR(255)); INSERT INTO Departments VALUES (1, 'Computer Science'); CREATE TABLE ResearchGrants(GranteeID INT, DepartmentID INT, GrantAmount DECIMAL(10, 2), GrantDate DATE); INSERT INTO ResearchGrants VALUES (1, 1, 50000.00, '2021-01-01');", "sql": "SELECT Departments.Department, COUNT(ResearchGrants.GranteeID) FROM Departments INNER JOIN ResearchGrants ON Departments.DepartmentID = ResearchGrants.DepartmentID WHERE ResearchGrants.GrantDate >= DATEADD(year, -1, GETDATE()) GROUP BY Departments.Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many districts does william b. oliver represent?", "schema": "CREATE TABLE table_1342359_2 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(district) FROM table_1342359_2 WHERE incumbent = 'William B. Oliver';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "List the names of all regulatory frameworks in the database.", "schema": "CREATE TABLE regulatory_frameworks (framework_id serial, framework_name varchar(20)); INSERT INTO regulatory_frameworks (framework_id, framework_name) VALUES (1, 'GDPR'), (2, 'HIPAA'), (3, 'PCI-DSS');", "sql": "SELECT framework_name FROM regulatory_frameworks;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the operator of the ensemble from Yorkshire?", "schema": "CREATE TABLE table_name_89 (operator VARCHAR, region VARCHAR)", "sql": "SELECT operator FROM table_name_89 WHERE region = 'yorkshire';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the maximum price of natural makeup products sold in Germany?", "schema": "CREATE TABLE MakeupProducts(productId INT, productName VARCHAR(100), isNatural BOOLEAN, country VARCHAR(50), price DECIMAL(5,2)); INSERT INTO MakeupProducts(productId, productName, isNatural, country, price) VALUES (1, 'Mineral Foundation', true, 'Germany', 34.99), (2, 'Shea Butter Mascara', true, 'Germany', 19.99);", "sql": "SELECT MAX(price) FROM MakeupProducts WHERE isNatural = true AND country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Cmpok (assertion 5).", "schema": null, "sql": "SELECT * FROM check_test(\n cmp_ok( ARRAY[1, 2], '=', ARRAY[1, 2]),\n true,\n 'cmp_ok( int[], =, int[] )',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Cmpok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 130, "num_statements": 1} {"question": "Find the maximum ocean acidification level measured in each monitoring station and the corresponding date.'", "schema": "CREATE TABLE monitoring_stations (station_id TEXT, station_name TEXT); CREATE TABLE measurements (measurement_id TEXT, station_id TEXT, acidification_level FLOAT, measurement_date DATE);", "sql": "SELECT monitoring_stations.station_name, MAX(measurements.acidification_level), measurements.measurement_date FROM monitoring_stations JOIN measurements ON monitoring_stations.station_id = measurements.station_id GROUP BY monitoring_stations.station_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many total goals did the squad with 2 playoff apps, 2 FA Cup Apps, and 0 League Cup goals get?", "schema": "CREATE TABLE table_name_88 (total_goals INTEGER, league_cup_goals VARCHAR, playoff_apps VARCHAR, fa_cup_apps VARCHAR)", "sql": "SELECT SUM(total_goals) FROM table_name_88 WHERE playoff_apps = '2' AND fa_cup_apps = '2' AND league_cup_goals < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 492).", "schema": null, "sql": "create table parted_index_col_drop2 partition of parted_index_col_drop\n for values in (2) partition by list (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "pgTAP test for Policy (assertion 71).", "schema": null, "sql": "SELECT * FROM check_test(\n policy_cmd_is( 'public', 'passwd', 'root_all', 'all', 'whatever' ),\n true,\n 'policy_cmd_is(schema, table, policy, command, desc) for ALL',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Policy.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "What is the total cost of smart city technology adoptions in the city of Toronto?", "schema": "CREATE TABLE smart_city_tech (tech_id INT, tech_name VARCHAR(30), city VARCHAR(20), cost DECIMAL(10,2)); INSERT INTO smart_city_tech (tech_id, tech_name, city, cost) VALUES (1, 'Smart Grids', 'Toronto', 5000000.00), (2, 'Smart Lighting', 'Montreal', 3000000.00), (3, 'Smart Traffic Management', 'Vancouver', 4000000.00);", "sql": "SELECT SUM(cost) FROM smart_city_tech WHERE city = 'Toronto';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 477).", "schema": null, "sql": "INSERT INTO num_data VALUES (5, '-652755630.43456071828048833552593835051449845484289562110789582081210403487973096161149072377955192388469356112505543620695003436531392789029513380101663750625024853263344909355177280161504414335005574882649025508632900995595004153086358670541462762210415346958050909878501048483523600711486406055424807840429541335391538322886495085448421556770991545781035298449067051916630343957356635391594362639819978677032855590055900561501350354631803808000307050416047072513406855040715556454205065332997338225626635780147287003130754254277103928406089109802521803537038957372612837169223905290912251006321930223154562110264217937');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 658, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many seats in 2001 with a quantity greater than 4?", "schema": "CREATE TABLE table_name_17 (number_of_seats VARCHAR, year VARCHAR, quantity VARCHAR)", "sql": "SELECT COUNT(number_of_seats) FROM table_name_17 WHERE year = '2001' AND quantity > 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'test_setup' (example 67).", "schema": null, "sql": "--\n-- These functions are used in tests that used to use md5(), which we now\n-- mostly avoid so that the tests will pass in FIPS mode.\n--\n\ncreate function fipshash(bytea)\n returns text\n strict immutable parallel safe leakproof\n return substr(encode(sha256($1), 'hex'), 1, 32);", "explanation": "PL/pgSQL object from PostgreSQL core test for Test Setup.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Week when anke huber chanda rubin shows for Semi finalists, and the Runner-up is meredith mcgrath larisa savchenko?", "schema": "CREATE TABLE table_name_2 (week_of VARCHAR, semi_finalists VARCHAR, runner_up VARCHAR)", "sql": "SELECT week_of FROM table_name_2 WHERE semi_finalists = 'anke huber chanda rubin' AND runner_up = 'meredith mcgrath larisa savchenko';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Insert a new sustainable sourcing record for restaurant 2 with a rating of 95. Use the sustainable_sourcing table.", "schema": "CREATE TABLE sustainable_sourcing (restaurant_id INT, rating INT); INSERT INTO sustainable_sourcing (restaurant_id, rating) VALUES (2, 95);", "sql": "INSERT INTO sustainable_sourcing (restaurant_id, rating) VALUES (2, 95);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Find the total production of Praseodymium and Neodymium for each year in the given dataset?", "schema": "CREATE TABLE RareEarthElements_Production (year INT, element TEXT, production INT); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2019, 'Praseodymium', 500); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2019, 'Neodymium', 1000); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2020, 'Praseodymium', 600); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2020, 'Neodymium', 1200); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2021, 'Praseodymium', 700); INSERT INTO RareEarthElements_Production (year, element, production) VALUES (2021, 'Neodymium', 1400);", "sql": "SELECT year, SUM(production) as total_production FROM RareEarthElements_Production WHERE element IN ('Praseodymium', 'Neodymium') GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Calculate attendance by age group for the 'Art of the Americas'.", "schema": "CREATE TABLE attendance (id INT, age INT, event VARCHAR(50), visitors INT); INSERT INTO attendance (id, age, event, visitors) VALUES (1, 18, 'Art of the Americas', 500), (2, 25, 'Art of the Americas', 700), (3, 35, 'Art of the Americas', 800);", "sql": "SELECT event, AVG(age) as avg_age, COUNT(*) as total FROM attendance WHERE event = 'Art of the Americas' GROUP BY event;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "List the number of mental health parity violations in each state for the last 3 years, ordered by the date of violation in descending order.", "schema": "CREATE TABLE MentalHealthParityViolations (ViolationID INT, State VARCHAR(255), ViolationDate DATE); INSERT INTO MentalHealthParityViolations (ViolationID, State, ViolationDate) VALUES (1, 'California', '2019-04-01'); INSERT INTO MentalHealthParityViolations (ViolationID, State, ViolationDate) VALUES (2, 'Texas', '2020-01-15'); INSERT INTO MentalHealthParityViolations (ViolationID, State, ViolationDate) VALUES (3, 'California', '2021-03-05');", "sql": "SELECT State, COUNT(*) as NumberOfViolations, ViolationDate FROM MentalHealthParityViolations WHERE ViolationDate >= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR) GROUP BY State, ViolationDate ORDER BY ViolationDate DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 10).", "schema": null, "sql": "SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "List the names and capacities of hotels with sustainability certifications in New York.", "schema": "CREATE TABLE hotels (hotel_id INT, name VARCHAR(255), city VARCHAR(255), capacity INT, certified BOOLEAN); INSERT INTO hotels (hotel_id, name, city, capacity, certified) VALUES (1, 'EcoHotel NY', 'New York', 150, TRUE), (2, 'GreenHotel NY', 'New York', 200, FALSE);", "sql": "SELECT name, capacity FROM hotels WHERE city = 'New York' AND certified = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What opponent had an attendance of 63,659?", "schema": "CREATE TABLE table_name_78 (opponent_number VARCHAR, attendance VARCHAR)", "sql": "SELECT opponent_number FROM table_name_78 WHERE attendance = '63,659';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all fish species from the \"fish_species\" table that belong to the \"Salmonidae\" family", "schema": "create table fish_species (id integer, name text, family text, region text); insert into fish_species (id, name, family, region) values (1, 'Salmon', 'Salmonidae', 'North Atlantic'); insert into fish_species (id, name, family, region) values (2, 'Trout', 'Salmonidae', 'North Pacific');", "sql": "select * from fish_species where family = 'Salmonidae';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the maximum carbon dioxide level (in ppm) recorded for fish farming in Japan?", "schema": "CREATE TABLE co2_measurements (id INT, farm_id INT, timestamp TIMESTAMP, co2_level FLOAT); CREATE VIEW japan_co2_measurements AS SELECT * FROM co2_measurements WHERE country = 'Japan';", "sql": "SELECT MAX(co2_level) FROM japan_co2_measurements;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of cars for panther racing and grid of 9", "schema": "CREATE TABLE table_17304308_1 (car_no VARCHAR, team VARCHAR, grid VARCHAR)", "sql": "SELECT COUNT(car_no) FROM table_17304308_1 WHERE team = 'Panther Racing' AND grid = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "How many visual artists are represented in the database, and what is the distribution by their gender?", "schema": "CREATE TABLE artists (id INT, name VARCHAR(255), birth_date DATE, gender VARCHAR(50));", "sql": "SELECT COUNT(*) as total_artists, gender FROM artists GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Update the region to 'Asia Pacific' for all records with a revenue above 8000 in the SkincareSales table.", "schema": "CREATE TABLE SkincareSales (productID INT, productName VARCHAR(50), region VARCHAR(50), revenue DECIMAL(10,2)); INSERT INTO SkincareSales (productID, productName, region, revenue) VALUES (1, 'Nourishing Cream', 'Europe', 5000.00), (2, 'Soothing Lotion', 'Europe', 7000.00), (3, 'Regenerating Serum', 'Europe', 8000.00), (4, 'Revitalizing Moisturizer', 'North America', 6000.00), (5, 'Purifying Cleanser', 'North America', 9000.00);", "sql": "UPDATE SkincareSales SET region = 'Asia Pacific' WHERE revenue > 8000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1991 finish for the 1993 Grand Slams?", "schema": "CREATE TABLE table_name_60 (Id VARCHAR)", "sql": "SELECT 1991 FROM table_name_60 WHERE 1993 = 'grand slams';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Pos has a Car # of 33?", "schema": "CREATE TABLE table_name_72 (pos INTEGER, car__number VARCHAR)", "sql": "SELECT SUM(pos) FROM table_name_72 WHERE car__number = 33;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result in round qf?", "schema": "CREATE TABLE table_name_56 (result VARCHAR, round VARCHAR)", "sql": "SELECT result FROM table_name_56 WHERE round = 'qf';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the product type whose average price is higher than the average price of all products.", "schema": "CREATE TABLE products (product_type_code VARCHAR, product_price INTEGER)", "sql": "SELECT product_type_code FROM products GROUP BY product_type_code HAVING AVG(product_price) > (SELECT AVG(product_price) FROM products);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the minimum CO2 emission reduction (in metric tons) achieved by carbon offset programs in 'NorthAmerica' in '2020'?", "schema": "CREATE TABLE carbon_offsets (id INT, program_name VARCHAR(50), location VARCHAR(50), year INT, co2_reduction INT); INSERT INTO carbon_offsets (id, program_name, location, year, co2_reduction) VALUES (1, 'ProgramA', 'NorthAmerica', 2020, 800), (2, 'ProgramB', 'NorthAmerica', 2020, 1000);", "sql": "SELECT MIN(co2_reduction) FROM carbon_offsets WHERE location = 'NorthAmerica' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 91).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Angelina');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 99).", "schema": null, "sql": "CREATE FUNCTION gbt_float8_compress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of males 977 948?", "schema": "CREATE TABLE table_name_20 (number VARCHAR, males VARCHAR)", "sql": "SELECT number FROM table_name_20 WHERE males = '977 948';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did Carlton score?", "schema": "CREATE TABLE table_name_41 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_41 WHERE away_team = 'carlton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.91.0--0.92.0 (assertion 62).", "schema": null, "sql": "-- triggers_are( schema, table, triggers[], description )\nCREATE OR REPLACE FUNCTION triggers_are( NAME, NAME, NAME[], TEXT )\nRETURNS TEXT AS $$\n SELECT _are(\n 'triggers',\n ARRAY(\n SELECT t.tgname\n FROM pg_catalog.pg_trigger t\n JOIN pg_catalog.pg_class c ON c.oid = t.tgrelid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n WHERE n.nspname = $1\n AND c.relname = $2\n AND NOT t.tgisinternal\n EXCEPT\n SELECT $3[i]\n FROM generate_series(1, array_upper($3, 1)) s(i)\n ),\n ARRAY(\n SELECT $3[i]\n FROM generate_series(1, array_upper($3, 1)) s(i)\n EXCEPT\n SELECT t.tgname\n FROM pg_catalog.pg_trigger t\n JOIN pg_catalog.pg_class c ON c.oid = t.tgrelid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n WHERE n.nspname = $1\n AND c.relname = $2\n AND NOT t.tgisinternal\n ),\n $4\n );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.91.0--0.92.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 1110, "num_statements": 2} {"question": "Identify policyholders who have made more than two claims in Wisconsin", "schema": "CREATE TABLE claims (policyholder_id INT, claim_number INT, state VARCHAR(2)); INSERT INTO claims (policyholder_id, claim_number, state) VALUES (1, 1, 'WI'), (1, 2, 'WI'), (1, 3, 'WI'), (2, 1, 'WI');", "sql": "SELECT policyholder_id FROM claims WHERE state = 'WI' GROUP BY policyholder_id HAVING COUNT(*) > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "PostgreSQL regression test 'int2': Write the SELECT query (example 62).", "schema": null, "sql": "SELECT int2 '0x8000';", "explanation": "Regression test for Int2 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT int2 '0x8000') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the IATA when the ICAO is wipp?", "schema": "CREATE TABLE table_name_52 (iata VARCHAR, icao VARCHAR)", "sql": "SELECT iata FROM table_name_52 WHERE icao = 'wipp';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what country is the El Hamada area of operation?", "schema": "CREATE TABLE table_13150274_1 (country VARCHAR, area_of_operation VARCHAR)", "sql": "SELECT country FROM table_13150274_1 WHERE area_of_operation = 'El Hamada';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Find the customer with the highest balance in the socially_responsible_lending accounts, and display their account number, name, and balance.", "schema": "CREATE TABLE socially_responsible_lending (acct_number INT, name VARCHAR(50), balance DECIMAL(10,2)); INSERT INTO socially_responsible_lending (acct_number, name, balance) VALUES (3001, 'Jamal', 12000.00), (3002, 'Nadia', 16000.00), (3003, 'Zaki', 10000.00), (3004, 'Sara', 20000.00);", "sql": "SELECT acct_number, name, balance FROM (SELECT acct_number, name, balance, ROW_NUMBER() OVER (ORDER BY balance DESC) as rn FROM socially_responsible_lending) t WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 173, "num_statements": 1} {"question": "Which programs did volunteer 'Zoe' engage with in 2022?", "schema": "CREATE TABLE Programs (ProgramID int, ProgramName varchar(255)); INSERT INTO Programs (ProgramID, ProgramName) VALUES (1, 'Education'), (2, 'Health'), (3, 'Environment'); CREATE TABLE Volunteers (VolunteerID int, VolunteerName varchar(255), ProgramID int, VolunteerDate date); INSERT INTO Volunteers (VolunteerID, VolunteerName, ProgramID, VolunteerDate) VALUES (10, 'Zoe', 1, '2022-02-20'), (11, 'Zoe', 2, '2022-09-10'), (12, 'Zoe', 3, '2022-11-20');", "sql": "SELECT DISTINCT ProgramName FROM Volunteers V JOIN Programs P ON V.ProgramID = P.ProgramID WHERE VolunteerName = 'Zoe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "What is the average donation amount by age group in Canada?", "schema": "CREATE TABLE donations_age_canada (donation_id INT, donor_age INT, amount DECIMAL(10,2), country VARCHAR(255)); INSERT INTO donations_age_canada (donation_id, donor_age, amount, country) VALUES (5, 31, 450.00, 'Canada'), (6, 47, 300.00, 'Canada'), (7, 53, 500.00, 'Canada');", "sql": "SELECT AVG(amount) as avg_donation, FLOOR(donor_age / 10) * 10 as age_group FROM donations_age_canada WHERE country = 'Canada' GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "List the number of safe AI algorithms designed for autonomous vehicles grouped by their evaluation category.", "schema": "CREATE TABLE safe_ai_algorithms_av (id INT, algorithm VARCHAR(25), evaluation VARCHAR(25), score FLOAT); INSERT INTO safe_ai_algorithms_av (id, algorithm, evaluation, score) VALUES (1, 'AlgorithmJ', 'Robustness', 0.93), (2, 'AlgorithmK', 'Security', 0.96), (3, 'AlgorithmL', 'Reliability', 0.92), (4, 'AlgorithmM', 'Robustness', 0.97);", "sql": "SELECT evaluation, COUNT(*) as num_algorithms FROM safe_ai_algorithms_av WHERE evaluation IN ('Robustness', 'Security', 'Reliability') GROUP BY evaluation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "What is the minimum mental health score of students in 'Winter 2022' by school district?", "schema": "CREATE TABLE student_mental_health (student_id INT, mental_health_score INT, school_district VARCHAR(255), date DATE); INSERT INTO student_mental_health (student_id, mental_health_score, school_district, date) VALUES (1, 75, 'ABC School District', '2022-02-01'); CREATE VIEW winter_2022_smh AS SELECT * FROM student_mental_health WHERE date BETWEEN '2022-01-01' AND '2022-03-31';", "sql": "SELECT MIN(mental_health_score) as min_mental_health, school_district FROM winter_2022_smh GROUP BY school_district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Insert a new indigenous community record for the Inuit community in Greenland.", "schema": "CREATE TABLE arctic_communities (community_id INT, community_name VARCHAR(50), region_id INT);", "sql": "INSERT INTO arctic_communities (community_id, community_name, region_id) VALUES (1, 'Inuit', (SELECT region_id FROM arctic_regions WHERE region_name = 'Greenland'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest To par, when Total is greater than 148, and when Year(s) Won is \"1959 , 1968 , 1974\"?", "schema": "CREATE TABLE table_name_97 (to_par INTEGER, total VARCHAR, year_s__won VARCHAR)", "sql": "SELECT MIN(to_par) FROM table_name_97 WHERE total < 148 AND year_s__won = '1959 , 1968 , 1974';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHere is the friends' school?", "schema": "CREATE TABLE table_name_65 (location VARCHAR, school VARCHAR)", "sql": "SELECT location FROM table_name_65 WHERE school = 'the friends' school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List all the vessels that entered the port of Oakland and their corresponding cargo types, sorted by the cargo type.", "schema": "CREATE TABLE port (port_id INT, port_name VARCHAR(50)); INSERT INTO port (port_id, port_name) VALUES (1, 'Oakland'), (2, 'Seattle'); CREATE TABLE vessels (vessel_id INT, port_id INT, quantity_containers INT); INSERT INTO vessels (vessel_id, port_id, quantity_containers) VALUES (101, 1, 500), (102, 1, 700), (103, 2, 800); CREATE TABLE cargo (cargo_id INT, cargo_type VARCHAR(50), vessel_id INT); INSERT INTO cargo (cargo_id, cargo_type, vessel_id) VALUES (201, 'Containers', 101), (202, 'Vehicles', 102), (203, 'Bulk', 103);", "sql": "SELECT vessels.vessel_id, cargo.cargo_type FROM vessels JOIN port ON vessels.port_id = port.port_id JOIN cargo ON vessels.vessel_id = cargo.vessel_id WHERE port.port_name = 'Oakland' ORDER BY cargo.cargo_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the points when the lost was 11?", "schema": "CREATE TABLE table_name_29 (points_for VARCHAR, lost VARCHAR)", "sql": "SELECT points_for FROM table_name_29 WHERE lost = '11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Calculate the number of properties co-owned by people from underrepresented communities in each neighborhood, including Indigenous, Latinx, and African American.", "schema": "CREATE TABLE Properties (PropertyID INT, CoOwnedBy VARCHAR(50), Neighborhood VARCHAR(20)); INSERT INTO Properties (PropertyID, CoOwnedBy, Neighborhood) VALUES (1, 'Female, Indigenous', 'UrbanCore'), (2, 'Male, African American', 'UrbanCore'), (3, 'Female, Latinx', 'Suburban');", "sql": "SELECT Neighborhood, COUNT(*) FROM Properties WHERE CoOwnedBy LIKE '%Female, Indigenous%' OR CoOwnedBy LIKE '%Male, African American%' OR CoOwnedBy LIKE '%Female, Latinx%' GROUP BY Neighborhood;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 194, "num_statements": 1} {"question": "What is the total area of all wildlife habitats in the forestry database?", "schema": "CREATE TABLE habitat (id INT, name VARCHAR(255), area FLOAT); INSERT INTO habitat (id, name, area) VALUES (1, 'Habitat1', 123.45); INSERT INTO habitat (id, name, area) VALUES (2, 'Habitat2', 234.56); CREATE TABLE region (id INT, name VARCHAR(255), habitat_id INT); INSERT INTO region (id, name, habitat_id) VALUES (1, 'Region1', 1); INSERT INTO region (id, name, habitat_id) VALUES (2, 'Region2', 2);", "sql": "SELECT SUM(h.area) FROM habitat h JOIN region r ON h.id = r.habitat_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many marine species are found in the Pacific Ocean?", "schema": "CREATE TABLE marine_species_by_ocean (name VARCHAR(255), ocean VARCHAR(255)); INSERT INTO marine_species_by_ocean (name, ocean) VALUES ('Clownfish', 'Pacific Ocean'), ('Sea Otter', 'Pacific Ocean');", "sql": "SELECT COUNT(*) FROM marine_species_by_ocean WHERE ocean = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Find investment strategies with an ESG score below 70 and risk level above 4.", "schema": "CREATE TABLE investment_strategies (strategy_id INT, ESG_score FLOAT, risk_level INT); INSERT INTO investment_strategies (strategy_id, ESG_score, risk_level) VALUES (101, 86.2, 3), (102, 78.9, 5), (103, 88.7, 2), (104, 65.1, 6);", "sql": "SELECT strategy_id FROM investment_strategies WHERE ESG_score < 70 AND risk_level > 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of artworks by medium in Oceania?", "schema": "CREATE TABLE Artworks (ArtworkID INT, ArtworkName TEXT, Medium TEXT); INSERT INTO Artworks (ArtworkID, ArtworkName, Medium) VALUES (1, 'The Starry Night', 'Oil on Canvas'); INSERT INTO Artworks (ArtworkID, ArtworkName, Medium) VALUES (2, 'Guernica', 'Oil on Canvas');", "sql": "SELECT Medium, COUNT(*) as NumArtworks FROM Artworks WHERE Region = 'Oceania' GROUP BY Medium;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average time to resolve each type of case in the justice system?", "schema": "CREATE TABLE Justice_System_Case_Resolution (ID INT, Case_Type VARCHAR(30), Avg_Time_To_Resolve INT); INSERT INTO Justice_System_Case_Resolution (ID, Case_Type, Avg_Time_To_Resolve) VALUES (1, 'Criminal', 60), (2, 'Civil', 90), (3, 'Family', 45);", "sql": "SELECT Case_Type, AVG(Avg_Time_To_Resolve) FROM Justice_System_Case_Resolution GROUP BY Case_Type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Add new records of cybersecurity incidents in a specific year to the \"cybersecurity_incidents\" table", "schema": "CREATE TABLE cybersecurity_incidents (id INT, year INT, type VARCHAR(255), country VARCHAR(255), description TEXT);", "sql": "INSERT INTO cybersecurity_incidents (id, year, type, country, description) VALUES (1, 2018, 'Data Breach', 'US', 'Unauthorized access to sensitive personal information'), (2, 2018, 'Phishing', 'Canada', 'Fraudulent emails sent to steal personal information');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the day 1 when the day 3 is math?", "schema": "CREATE TABLE table_name_16 (day_1 VARCHAR, day_3 VARCHAR)", "sql": "SELECT day_1 FROM table_name_16 WHERE day_3 = 'math';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many millions of people in the US saw the episode with season number 1?", "schema": "CREATE TABLE table_19401346_1 (us_viewers__millions_ VARCHAR, no_in_season VARCHAR)", "sql": "SELECT us_viewers__millions_ FROM table_19401346_1 WHERE no_in_season = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Delete records with a Population less than 1000 from the 'AnimalPopulation' table.", "schema": "CREATE TABLE AnimalPopulation (AnimalID int, AnimalName varchar(50), Population int); INSERT INTO AnimalPopulation (AnimalID, AnimalName, Population) VALUES (1, 'Tiger', 2000), (2, 'Elephant', 500), (3, 'Giraffe', 1200);", "sql": "DELETE FROM AnimalPopulation WHERE Population < 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_procedure' (example 2).", "schema": null, "sql": "$$;\n\n\\df ptest1\nSELECT pg_get_functiondef('ptest1'::regproc);\n\n-- show only normal functions\n\\dfn public.*test*1\n\n-- show only procedures\n\\dfp public.*test*1\n\nSELECT ptest1('x'); -- error\nCALL ptest1('a'); -- ok\nCALL ptest1('xy' || 'zzy'); -- ok, constant-folded arg\nCALL ptest1(substring(random()::numeric(20,15)::text, 1, 1)); -- ok, volatile arg\n\nSELECT * FROM cp_test ORDER BY b COLLATE \"C\";\n\n\n-- SQL-standard body\nCREATE PROCEDURE ptest1s(x text)\nLANGUAGE SQL\nBEGIN ATOMIC\n INSERT INTO cp_test VALUES (1, x);\nEND;\n\n\\df ptest1s\nSELECT pg_get_functiondef('ptest1s'::regproc);\n\nCALL ptest1s('b');\n\nSELECT * FROM cp_test ORDER BY b COLLATE \"C\";\n\n-- utility functions currently not supported here\nCREATE PROCEDURE ptestx()\nLANGUAGE SQL\nBEGIN ATOMIC\n CREATE TABLE x (a int);\nEND;\n\n\nCREATE PROCEDURE ptest2()\nLANGUAGE SQL\nAS $$\nSELECT 5;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Procedure.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 841, "num_statements": 15} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 24).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION diag( VARIADIC anyarray );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year did she compete in tampere, finland?", "schema": "CREATE TABLE table_name_95 (year INTEGER, venue VARCHAR)", "sql": "SELECT AVG(year) FROM table_name_95 WHERE venue = 'tampere, finland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the annual production volume trend for the mine with the ID 'mine001'?", "schema": "CREATE TABLE production_data (id INT PRIMARY KEY, mine_id INT, year INT, monthly_production INT);", "sql": "SELECT year, AVG(monthly_production) as annual_production FROM production_data WHERE mine_id = 'mine001' GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the batsmen at the Brit Oval location?", "schema": "CREATE TABLE table_name_76 (batsmen VARCHAR, location VARCHAR)", "sql": "SELECT batsmen FROM table_name_76 WHERE location = 'the brit oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Delete records in the 'employee' table where the 'position' is 'field worker'", "schema": "CREATE TABLE employee (employee_id INT, name VARCHAR(50), position VARCHAR(20));", "sql": "DELETE FROM employee WHERE position = 'field worker';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many finals had more than 0 goals and 8 assists?", "schema": "CREATE TABLE table_name_14 (finals VARCHAR, goals VARCHAR, assists VARCHAR)", "sql": "SELECT finals FROM table_name_14 WHERE goals > 0 AND assists = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average age of players who have played a game using VR technology in the last 6 months, grouped by their preferred gaming genre?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, PreferredGenre VARCHAR(20), LastVRGame DATETIME); CREATE TABLE Games (GameID INT, Genre VARCHAR(20)); CREATE VIEW Game_Library AS SELECT PlayerID, GameID FROM Player_Games; CREATE VIEW Game_Details AS SELECT GameID, Genre FROM Games; CREATE VIEW VR_Games AS SELECT PlayerID FROM Game_Library WHERE GameID IN (SELECT GameID FROM Games WHERE VRCompatible = 1);", "sql": "SELECT PreferredGenre, AVG(Age) FROM Players JOIN VR_Games ON Players.PlayerID = VR_Games.PlayerID JOIN Game_Details ON Game_Library.GameID = Games.GameID WHERE LastVRGame >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) GROUP BY PreferredGenre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "What are the vehicle types and test results for safety tests conducted in the first quarter of 2022?", "schema": "CREATE TABLE VehicleSafetyTests (TestID INT PRIMARY KEY, VehicleID INT, TestDate DATE, TestResults VARCHAR(50)); CREATE TABLE Vehicles (VehicleID INT PRIMARY KEY, VehicleType VARCHAR(50), Manufacturer VARCHAR(50));", "sql": "SELECT Vehicles.VehicleType, VehicleSafetyTests.TestResults FROM Vehicles INNER JOIN VehicleSafetyTests ON Vehicles.VehicleID = VehicleSafetyTests.VehicleID WHERE VehicleSafetyTests.TestDate BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "What is the total assets of clients who have invested in at least two different types of investments?", "schema": "CREATE TABLE clients (client_id INT, name TEXT, age INT, gender TEXT, total_assets DECIMAL(10,2)); INSERT INTO clients VALUES (1, 'John Doe', 35, 'Male', 250000.00), (2, 'Jane Smith', 45, 'Female', 500000.00), (3, 'Bob Johnson', 50, 'Male', 800000.00); CREATE TABLE investments (client_id INT, investment_type TEXT); INSERT INTO investments VALUES (1, 'Stocks'), (1, 'Bonds'), (2, 'Stocks'), (2, 'Mutual Funds'), (3, 'Mutual Funds'), (3, 'Real Estate');", "sql": "SELECT c.total_assets FROM clients c INNER JOIN investments i1 ON c.client_id = i1.client_id INNER JOIN investments i2 ON c.client_id = i2.client_id AND i1.investment_type <> i2.investment_type GROUP BY c.client_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "What is the average price of fair trade materials in the materials table?", "schema": "CREATE TABLE materials (material_id INT, material_name TEXT, is_fair_trade BOOLEAN, price DECIMAL); INSERT INTO materials VALUES (1, 'Handwoven Cotton', TRUE, 5.5); INSERT INTO materials VALUES (2, 'Recycled Polyester', FALSE, 3.25);", "sql": "SELECT AVG(price) FROM materials WHERE is_fair_trade = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 357).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (8,3,'-.000000000000000000000000000140907135225782279761112255989433531718277338909398600029580768021365259747075253760824424092983497958717844671162530550507041138147836569244869107757945370200122955794509365120853536859837243314494576053441804831018954867623755033888264275704547752628348151132333655667171970175829826792355986148522268067032057293494927558322394395160508723637192234110428953945018965078022622950949911124494740703606109543716688008516750321047603009424529696862953094999450658951089435460411028678817795100630449046993274191915359520936265372754315076684798942557329584282177053819106884196674660057281227248874819417305259132106690385871316407455034281900110779740008476645291647094776093567400422266906817555937149628005629880142615126571231411138926043531449659320501743591992888328328980526602');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 852, "num_statements": 1} {"question": "What is the average age of community health workers who speak a language other than English?", "schema": "CREATE TABLE Languages (language_id INT, language_name TEXT);CREATE TABLE CommunityHealthWorkers (worker_id INT, worker_language INT, worker_age INT);", "sql": "SELECT AVG(worker_age) FROM CommunityHealthWorkers cwh WHERE worker_language IN (SELECT language_id FROM Languages WHERE language_name != 'English');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the last year that they had less than 16 points in class 500cc?", "schema": "CREATE TABLE table_name_11 (year INTEGER, points VARCHAR, class VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_11 WHERE points < 16 AND class = '500cc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest position for less than 42 played?", "schema": "CREATE TABLE table_name_48 (position INTEGER, played INTEGER)", "sql": "SELECT MAX(position) FROM table_name_48 WHERE played < 42;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the maximum supply of each digital asset issued by companies based in the UK?", "schema": "CREATE TABLE Digital_Assets (Asset_ID INT, Asset_Name VARCHAR(100), Max_Supply BIGINT); INSERT INTO Digital_Assets (Asset_ID, Asset_Name, Max_Supply) VALUES (1, 'Asset1', 1000000), (2, 'Asset2', 500000), (3, 'Asset3', 200000);", "sql": "SELECT Asset_ID, Asset_Name, MAX(Max_Supply) FROM Digital_Assets WHERE Issuer_Country = 'UK' GROUP BY Asset_ID, Asset_Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "How many unions focus on 'Transportation' and have more than 100,000 members?", "schema": "CREATE TABLE unions (id INT, name TEXT, domain TEXT, members INT); INSERT INTO unions (id, name, domain, members) VALUES (1, 'International Brotherhood of Teamsters', 'Transportation', 1200000); INSERT INTO unions (id, name, domain, members) VALUES (2, 'United Steelworkers', 'Metals, Mining, Energy, Construction', 850000);", "sql": "SELECT COUNT(*) FROM unions WHERE domain = 'Transportation' AND members > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the segment A on episode 237?", "schema": "CREATE TABLE table_15187735_19 (segment_a VARCHAR, episode VARCHAR)", "sql": "SELECT segment_a FROM table_15187735_19 WHERE episode = 237;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the maximum cargo weight (in metric tons) for each port?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(50), country VARCHAR(50)); INSERT INTO ports VALUES (1, 'Tanjung Priok', 'Indonesia'); INSERT INTO ports VALUES (2, 'Belawan', 'Indonesia'); CREATE TABLE cargo (cargo_id INT, port_id INT, weight_ton FLOAT); INSERT INTO cargo VALUES (1, 1, 5000); INSERT INTO cargo VALUES (2, 1, 7000); INSERT INTO cargo VALUES (3, 2, 3000); INSERT INTO cargo VALUES (4, 2, 4000);", "sql": "SELECT ports.port_name, MAX(cargo.weight_ton) FROM cargo JOIN ports ON cargo.port_id = ports.port_id GROUP BY ports.port_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "pgTAP test for Cmpok (assertion 7).", "schema": null, "sql": "SELECT * FROM check_test(\n cmp_ok( 1, '=', 2, '1 should = 2' ),\n false,\n 'cmp_ok() fail',\n '1 should = 2',\n ' ''1''\n =\n ''2'''\n);", "explanation": "SQL assertion from pgTAP test suite for Cmpok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 24).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i<'-infinity'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum donation amount from donors living in 'India'?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount DECIMAL(10,2), country TEXT); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (1, 'John Doe', 500.00, 'United States'); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (2, 'Jane Smith', 300.00, 'Canada'); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (3, 'Jose Garcia', 250.00, 'Brazil'); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (4, 'Raj Patel', 700.00, 'India');", "sql": "SELECT MAX(donation_amount) FROM donors WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the percentage of sustainable products for each brand?", "schema": "CREATE TABLE products (product_id INT, product_name TEXT, brand_id INT, sustainable_product BOOLEAN); INSERT INTO products (product_id, product_name, brand_id, sustainable_product) VALUES (1, 'Organic Cotton Shirt', 1, TRUE), (2, 'Polyester Jacket', 1, FALSE), (3, 'Hemp T-Shirt', 2, TRUE), (4, 'Viscose Dress', 2, FALSE);", "sql": "SELECT brands.brand_name, ROUND(COUNT(CASE WHEN products.sustainable_product = TRUE THEN 1 END) * 100.0 / COUNT(*), 2) as percentage FROM products JOIN brands ON products.brand_id = brands.brand_id GROUP BY brands.brand_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Show the total number of medals won by each country in the Olympics, including the number of gold, silver, and bronze medals.", "schema": "CREATE TABLE olympic_medals (country VARCHAR(255), medal VARCHAR(255), count INT); INSERT INTO olympic_medals VALUES ('USA', 'Gold', 50); INSERT INTO olympic_medals VALUES ('China', 'Silver', 30); INSERT INTO olympic_medals VALUES ('Russia', 'Bronze', 20);", "sql": "SELECT country, SUM(CASE WHEN medal = 'Gold' THEN count ELSE 0 END) as gold_medals, SUM(CASE WHEN medal = 'Silver' THEN count ELSE 0 END) as silver_medals, SUM(CASE WHEN medal = 'Bronze' THEN count ELSE 0 END) as bronze_medals, SUM(count) as total_medals FROM olympic_medals GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "What is the average sustainability rating for garments manufactured in Africa?", "schema": "CREATE TABLE garment_info (garment_id INT, sustainability_rating DECIMAL(3, 2)); INSERT INTO garment_info (garment_id, sustainability_rating) VALUES (1001, 4.2), (1002, 3.5), (1003, 4.8), (1004, 2.9), (1005, 4.5), (1006, 3.7); CREATE TABLE garment_manufacturing (manufacturing_id INT, garment_id INT, country VARCHAR(255)); INSERT INTO garment_manufacturing (manufacturing_id, garment_id, country) VALUES (1, 1001, 'Nigeria'), (2, 1002, 'Egypt'), (3, 1003, 'UK'), (4, 1004, 'China'), (5, 1005, 'Bangladesh'), (6, 1006, 'Indonesia');", "sql": "SELECT AVG(g.sustainability_rating) AS avg_sustainability_rating FROM garment_info g INNER JOIN garment_manufacturing m ON g.garment_id = m.garment_id WHERE m.country IN ('Nigeria', 'Egypt');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the reanking of Hamid Veisi?", "schema": "CREATE TABLE table_name_44 (rank VARCHAR, athlete VARCHAR)", "sql": "SELECT rank FROM table_name_44 WHERE athlete = 'hamid veisi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the maximum ticket price for concerts in the 'Hip Hop' genre?", "schema": "CREATE TABLE ConcertGenre (ConcertID INT, GenreID INT); INSERT INTO ConcertGenre VALUES (7, 3), (8, 1), (9, 2), (13, 5);", "sql": "SELECT MAX(TicketPrice) FROM Concerts JOIN ConcertGenre ON Concerts.ConcertID = ConcertGenre.ConcertID JOIN Genre ON ConcertGenre.GenreID = Genre.GenreID WHERE Genre.Genre = 'Hip Hop';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'merge' (example 506).", "schema": null, "sql": "INSERT INTO measurement VALUES (1, '2007-01-17', 10, 10);", "explanation": "DML from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show the location and treatment type for each wastewater treatment plant", "schema": "CREATE TABLE wastewater_treatment (id INT, location VARCHAR(255), treatment_type VARCHAR(255), capacity INT);", "sql": "SELECT location, treatment_type FROM wastewater_treatment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What color is the super-soft compound?", "schema": "CREATE TABLE table_name_31 (colour VARCHAR, compound_name VARCHAR)", "sql": "SELECT colour FROM table_name_31 WHERE compound_name = 'super-soft';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the maximum duration of a virtual tour?", "schema": "CREATE TABLE virtual_tours(id INT, name TEXT, country TEXT, duration INT); INSERT INTO virtual_tours (id, name, country, duration) VALUES (1, 'Tokyo Sky Tree Virtual Tour', 'Japan', 60), (2, 'Paris Virtual Tour', 'France', 90);", "sql": "SELECT MAX(duration) FROM virtual_tours;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Delete all records from the 'sustainable_tourism_practices' table.", "schema": "CREATE TABLE sustainable_tourism_practices (id INT, title TEXT, description TEXT, country TEXT);", "sql": "DELETE FROM sustainable_tourism_practices;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What was the production of Samarium in 2018 and 2021?", "schema": "CREATE TABLE production_data (year INT, element VARCHAR(10), quantity INT); INSERT INTO production_data (year, element, quantity) VALUES (2018, 'Samarium', 80), (2019, 'Samarium', 90), (2020, 'Samarium', 100), (2021, 'Samarium', 110);", "sql": "SELECT quantity FROM production_data WHERE element = 'Samarium' AND year IN (2018, 2021);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Opponent of the game in Week 3?", "schema": "CREATE TABLE table_name_21 (opponent VARCHAR, week VARCHAR)", "sql": "SELECT opponent FROM table_name_21 WHERE week = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Find the minimum and maximum depth of mines in South Africa.", "schema": "CREATE TABLE mines (id INT, name TEXT, location TEXT, depth INT); INSERT INTO mines (id, name, location, depth) VALUES (1, 'Diamond Mine', 'South Africa', 1500); INSERT INTO mines (id, name, location, depth) VALUES (2, 'Gold Mine', 'South Africa', 2000);", "sql": "SELECT MIN(depth), MAX(depth) FROM mines WHERE location = 'South Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average number of hours volunteered per week by volunteers from Asia?", "schema": "CREATE TABLE volunteer_hours (id INT, volunteer_id INT, hours DECIMAL, week INT); INSERT INTO volunteer_hours (id, volunteer_id, hours, week) VALUES (1, 1, 5.0, 1), (2, 2, 10.0, 1), (3, 3, 7.5, 1), (4, 1, 4.0, 2), (5, 3, 8.0, 2); CREATE TABLE volunteers (id INT, name TEXT, region TEXT); INSERT INTO volunteers (id, name, region) VALUES (1, 'Alice', 'Asia'), (2, 'Bob', 'Europe'), (3, 'Charlie', 'Africa');", "sql": "SELECT AVG(hours) FROM volunteer_hours INNER JOIN volunteers ON volunteer_hours.volunteer_id = volunteers.id WHERE volunteers.region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Identify the most common vulnerabilities in the Caribbean region in Q4 of 2021.", "schema": "CREATE TABLE vulnerabilities_caribbean (id INT, vulnerability VARCHAR(255), region VARCHAR(255), detection_date DATE); INSERT INTO vulnerabilities_caribbean (id, vulnerability, region, detection_date) VALUES (1, 'SQL Injection', 'Caribbean', '2021-10-01'), (2, 'Cross-Site Scripting', 'Caribbean', '2021-11-01'), (3, 'Broken Authentication', 'Caribbean', '2021-12-01');", "sql": "SELECT vulnerability, COUNT(*) AS detection_count FROM vulnerabilities_caribbean WHERE region = 'Caribbean' AND detection_date >= DATE_SUB('2021-12-31', INTERVAL 3 MONTH) GROUP BY vulnerability ORDER BY detection_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 224, "num_statements": 1} {"question": "Show the total cost of completed projects in the 'transportation_infrastructure' table.", "schema": "CREATE TABLE transportation_infrastructure (id INT, project_name VARCHAR(50), location VARCHAR(50), cost FLOAT, status VARCHAR(20)); INSERT INTO transportation_infrastructure (id, project_name, location, cost, status) VALUES (1, 'Bridge Construction', 'City U', 12000000.00, 'completed'), (2, 'Traffic Signal Upgrade', 'Region V', 500000.00, 'in_progress');", "sql": "SELECT SUM(cost) FROM transportation_infrastructure WHERE status = 'completed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "PostgreSQL regression test 'updatable_views': Write the SELECT query (example 91).", "schema": null, "sql": "SELECT * FROM rw_view2 ORDER BY aaa;", "explanation": "Regression test for Updatable Views in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM rw_view2 ORDER BY aaa) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What place has a draw smaller than 2?", "schema": "CREATE TABLE table_name_90 (place INTEGER, draw INTEGER)", "sql": "SELECT AVG(place) FROM table_name_90 WHERE draw < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average rating of movies directed by 'Director2'?", "schema": "CREATE TABLE movies (id INT, title VARCHAR(255), rating FLOAT, director VARCHAR(255)); INSERT INTO movies (id, title, rating, director) VALUES (1, 'Movie1', 4.5, 'Director1'), (2, 'Movie2', 3.2, 'Director2'), (3, 'Movie3', 4.7, 'Director2'), (4, 'Movie4', 2.9, 'Director3');", "sql": "SELECT AVG(rating) FROM movies WHERE director = 'Director2';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many lanes have a Nationality of iceland?", "schema": "CREATE TABLE table_name_58 (lane INTEGER, nationality VARCHAR)", "sql": "SELECT SUM(lane) FROM table_name_58 WHERE nationality = 'iceland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Insert a new record for 'Bamboo Viscose' with a water consumption reduction of '50%' into the 'sustainability_metrics' table", "schema": "CREATE TABLE sustainability_metrics (id INT PRIMARY KEY, fabric VARCHAR(50), water_reduction DECIMAL(3,2));", "sql": "INSERT INTO sustainability_metrics (id, fabric, water_reduction) VALUES (2, 'Bamboo Viscose', 0.50);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In how many game was the attendance at Staples Center 18,997?", "schema": "CREATE TABLE table_17058151_5 (game VARCHAR, location_attendance VARCHAR)", "sql": "SELECT COUNT(game) FROM table_17058151_5 WHERE location_attendance = 'Staples Center 18,997';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "How many components were manufactured using renewable energy sources in Q2 of 2021?", "schema": "CREATE TABLE components (id INT, manufacturer TEXT, energy_source TEXT, quantity INT); INSERT INTO components (id, manufacturer, energy_source, quantity) VALUES (1, 'XYZ', 'solar', 500), (2, 'ABC', 'wind', 700); CREATE TABLE dates (id INT, component_id INT, date DATE); INSERT INTO dates (id, component_id, date) VALUES (1, 1, '2021-04-01'), (2, 2, '2021-05-15');", "sql": "SELECT SUM(quantity) FROM components c JOIN dates d ON c.id = d.component_id WHERE energy_source IN ('solar', 'wind') AND date BETWEEN '2021-04-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many values are in the ends won cell corresponding to a PA of 39?", "schema": "CREATE TABLE table_17012578_37 (Ends VARCHAR, pa VARCHAR)", "sql": "SELECT COUNT(Ends) AS won FROM table_17012578_37 WHERE pa = 39;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the first election year listed?", "schema": "CREATE TABLE table_1341472_20 (first_elected INTEGER)", "sql": "SELECT MIN(first_elected) FROM table_1341472_20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Rank with a build year earlier than 2007 for the howard johnson hotel bucharest?", "schema": "CREATE TABLE table_name_39 (rank INTEGER, built VARCHAR, building VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_39 WHERE built < 2007 AND building = 'howard johnson hotel bucharest';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Add a new research grant to the grants table.", "schema": "CREATE TABLE grants (id INT, name VARCHAR(50), amount DECIMAL(10,2), principal_investigator VARCHAR(50), department VARCHAR(50));", "sql": "INSERT INTO grants (id, name, amount, principal_investigator, department) VALUES (2, 'DOE Grant', 75000, 'Benjamin Thomas', 'Engineering');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of all English songs.", "schema": "CREATE TABLE song (song_name VARCHAR, languages VARCHAR)", "sql": "SELECT song_name FROM song WHERE languages = 'english';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total revenue for each restaurant that offers vegan options in Q3 2022?", "schema": "CREATE TABLE restaurant_menu (restaurant_id INT, has_vegan_options BOOLEAN); INSERT INTO restaurant_menu (restaurant_id, has_vegan_options) VALUES (1, true), (2, false); CREATE TABLE restaurant_revenue (restaurant_id INT, revenue DECIMAL(10,2), transaction_date DATE); INSERT INTO restaurant_revenue (restaurant_id, revenue, transaction_date) VALUES (1, 10000, '2022-07-01'), (2, 8000, '2022-07-02');", "sql": "SELECT r.restaurant_id, SUM(revenue) as total_revenue FROM restaurant_revenue r JOIN restaurant_menu m ON r.restaurant_id = m.restaurant_id WHERE m.has_vegan_options = true GROUP BY r.restaurant_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "Find the artist with the highest total revenue from art sales.", "schema": "CREATE TABLE ArtSales (artist VARCHAR(255), sale_price DECIMAL(10,2)); INSERT INTO ArtSales (artist, sale_price) VALUES ('Artist A', 5000), ('Artist A', 7000), ('Artist B', 6000), ('Artist B', 8000), ('Artist C', 9000), ('Artist C', 10000);", "sql": "SELECT artist, SUM(sale_price) as total_revenue FROM ArtSales GROUP BY artist ORDER BY total_revenue DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What rank is Germany?", "schema": "CREATE TABLE table_name_74 (rank VARCHAR, country VARCHAR)", "sql": "SELECT rank FROM table_name_74 WHERE country = 'germany';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of alternative sentencing programs implemented in Illinois since 2017?", "schema": "CREATE TABLE alternative_sentencing_programs (program_id INT, year INT, state VARCHAR(20)); INSERT INTO alternative_sentencing_programs (program_id, year, state) VALUES (1, 2022, 'Illinois'), (2, 2021, 'Illinois'), (3, 2020, 'Illinois'), (4, 2019, 'Illinois'), (5, 2018, 'Illinois'), (6, 2017, 'Illinois');", "sql": "SELECT COUNT(*) FROM alternative_sentencing_programs WHERE year >= 2017 AND state = 'Illinois';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total amount of research grants awarded by country?", "schema": "CREATE TABLE research_grants (id INT, student_id INT, year INT, amount DECIMAL(10, 2), country VARCHAR(50)); INSERT INTO research_grants VALUES (1, 1, 2021, 10000, 'USA'); INSERT INTO research_grants VALUES (2, 2, 2020, 12000, 'Canada'); INSERT INTO research_grants VALUES (3, 3, 2021, 15000, 'Mexico');", "sql": "SELECT r.country, SUM(r.amount) FROM research_grants r GROUP BY r.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 337).", "schema": null, "sql": "SELECT '2011-03-26 23:00:00 UTC'::timestamptz AT TIME ZONE 'Europe/Moscow';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-26 23:00:00 UTC'::timestamptz AT TIME ZONE 'Europe/Moscow') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average annual visitors to cultural heritage sites in Italy?", "schema": "CREATE TABLE cultural_heritage_sites_italy (site_id INT, site_name TEXT, country TEXT, annual_visitors INT); INSERT INTO cultural_heritage_sites_italy (site_id, site_name, country, annual_visitors) VALUES (1, 'Colosseum', 'Italy', 2000000), (2, 'Leaning Tower of Pisa', 'Italy', 1500000);", "sql": "SELECT AVG(annual_visitors) FROM cultural_heritage_sites_italy;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What did the home team score when playing Fitzroy as the away team?", "schema": "CREATE TABLE table_name_8 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_8 WHERE away_team = 'fitzroy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total transaction amount for 'South America' customers?", "schema": "CREATE TABLE transactions (id INT, customer_region VARCHAR(20), transaction_amount DECIMAL(10,2)); INSERT INTO transactions (id, customer_region, transaction_amount) VALUES (1, 'North America', 500.00), (2, 'North America', 750.00), (3, 'South America', 800.00), (4, 'Europe', 900.00);", "sql": "SELECT SUM(transaction_amount) FROM transactions WHERE customer_region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the most common word used in posts made by users from Germany?", "schema": "CREATE TABLE posts (id INT, user_id INT, content TEXT); INSERT INTO posts (id, user_id, content) VALUES (1, 1, 'Hello World'), (2, 1, 'I love data'), (3, 2, 'Guten Tag'), (4, 2, 'Ich liebe Deutschland'); CREATE TABLE users (id INT, name VARCHAR(100), country VARCHAR(50)); INSERT INTO users (id, name, country) VALUES (1, 'John Doe', 'USA'), (2, 'Hans Schmidt', 'Germany');", "sql": "SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(content, ' ', n.n), ' ', -1) word, COUNT(*) count FROM posts JOIN users ON posts.user_id = users.id CROSS JOIN (SELECT 1 n UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) n WHERE users.country = 'Germany' GROUP BY word ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 299, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes aired in Sydney in Week 3?", "schema": "CREATE TABLE table_24291077_4 (sydney VARCHAR, week VARCHAR)", "sql": "SELECT COUNT(sydney) FROM table_24291077_4 WHERE week = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many research grants were awarded to the Physics department in 2020?", "schema": "CREATE TABLE grants (id INT, department VARCHAR(255), year INT, amount DECIMAL(10,2)); INSERT INTO grants (id, department, year, amount) VALUES (1, 'Physics', 2020, 50000), (2, 'Physics', 2019, 75000), (3, 'Chemistry', 2020, 60000);", "sql": "SELECT COUNT(*) FROM grants WHERE department = 'Physics' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which artist has created the most artwork entries in Africa in each decade since 1900?", "schema": "CREATE TABLE Artwork (ArtworkID INT, ArtistID INT, CreationDate DATE); INSERT INTO Artwork (ArtworkID, ArtistID, CreationDate) VALUES (1, 2, '1905-01-01'), (2, 2, '1910-05-15');", "sql": "SELECT ArtistID, EXTRACT(YEAR FROM CreationDate) AS Decade, COUNT(*) as ArtworkCount FROM Artwork WHERE Continent = 'Africa' GROUP BY ArtistID, Decade ORDER BY Decade, ArtworkCount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "What is the total number of sustainable building projects in the 'sustainable_buildings' table with a timeline of more than 365 days?", "schema": "CREATE TABLE sustainable_buildings (project_id INT, project_name TEXT, timeline_days INT);", "sql": "SELECT COUNT(*) FROM sustainable_buildings WHERE timeline_days > 365;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 43).", "schema": null, "sql": "SELECT ROW(x.*) FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one';", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ROW(x.*) FROM J1_TBL JOIN J2_TBL USING (i) AS x WHERE J1_TBL.t = 'one') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "determine the average fare for all train lines in 'Rail System'", "schema": "CREATE TABLE TrainLines (line VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO TrainLines (line, fare) VALUES ('North', 2.50), ('South', 3.25), ('East', 2.75);", "sql": "SELECT AVG(fare) FROM TrainLines;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Update the player's name in the players table", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(100), game VARCHAR(50));", "sql": "UPDATE players SET name = 'NewPlayerName' WHERE player_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Insert a new record into the market_trends table for 2022: price_per_kg = 70.00, total_kg = 22000", "schema": "CREATE TABLE market_trends ( id INT PRIMARY KEY, year INT, price_per_kg DECIMAL(10,2), total_kg INT );", "sql": "INSERT INTO market_trends (id, year, price_per_kg, total_kg) VALUES (5, 2022, 70.00, 22000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Location, when Result is 28-21?", "schema": "CREATE TABLE table_name_93 (location VARCHAR, result VARCHAR)", "sql": "SELECT location FROM table_name_93 WHERE result = '28-21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Which public transportation systems are available in Canada and the United States?", "schema": "CREATE TABLE public_transportation (id INT, city VARCHAR(50), country VARCHAR(50), system VARCHAR(50)); INSERT INTO public_transportation (id, city, country, system) VALUES (1, 'Toronto', 'Canada', 'Subway'), (2, 'Montreal', 'Canada', 'Bus'), (3, 'New York', 'USA', 'Subway'), (4, 'Los Angeles', 'USA', 'Bus');", "sql": "SELECT DISTINCT system FROM public_transportation WHERE country IN ('Canada', 'USA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What are the total REE production amounts for each country in 2020, excluding China?", "schema": "CREATE TABLE production (country VARCHAR(255), year INT, ree_production INT); INSERT INTO production (country, year, ree_production) VALUES ('China', 2020, 140000), ('USA', 2020, 15000), ('Australia', 2020, 22000), ('India', 2020, 5200), ('Brazil', 2020, 3000), ('Russia', 2020, 4000);", "sql": "SELECT country, SUM(ree_production) FROM production WHERE country NOT IN ('China') AND year = 2020 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the shirt sponsor for the manufacturer Nike?", "schema": "CREATE TABLE table_name_54 (shirt_sponsor VARCHAR, kit_manufacturer VARCHAR)", "sql": "SELECT shirt_sponsor FROM table_name_54 WHERE kit_manufacturer = 'nike';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Who is the coach of the team with the best win-loss record in the current season?", "schema": "CREATE TABLE coaches (team_id INT, coach_name VARCHAR(255)); INSERT INTO coaches (team_id, coach_name) VALUES (1, 'CoachA'), (2, 'CoachB'), (3, 'CoachC'); CREATE TABLE games (team_id INT, result VARCHAR(5)); INSERT INTO games (team_id, result) VALUES (1, 'Win'), (1, 'Win'), (1, 'Win'), (1, 'Loss'), (2, 'Win'), (2, 'Win'), (3, 'Win'), (3, 'Win'), (3, 'Win');", "sql": "SELECT coach_name FROM coaches INNER JOIN (SELECT team_id, SUM(CASE WHEN result = 'Win' THEN 1 ELSE 0 END) AS wins, SUM(CASE WHEN result = 'Loss' THEN 1 ELSE 0 END) AS losses FROM games GROUP BY team_id) AS subquery ON coaches.team_id = subquery.team_id WHERE subquery.wins >= (SELECT MAX(subquery2.wins) FROM subquery AS subquery2 WHERE subquery2.losses = subquery.losses) AND subquery.losses = (SELECT MIN(subquery3.losses) FROM subquery AS subquery3 WHERE subquery3.wins = subquery.wins);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 491, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 20).", "schema": null, "sql": "/****************************************************************************/\n-- Test schema_owner_is().\nSELECT * FROM check_test(\n schema_owner_is(current_schema(), _get_schema_owner(current_schema()), 'mumble'),\n\ttrue,\n 'schema_owner_is(schema, user, desc)',\n 'mumble',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 291, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Constructor at the Argentine Grand Prix?", "schema": "CREATE TABLE table_name_21 (constructor VARCHAR, race VARCHAR)", "sql": "SELECT constructor FROM table_name_21 WHERE race = 'argentine grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of casualties when there are 12 total deaths and more than 1 military death?", "schema": "CREATE TABLE table_name_47 (total_casualties VARCHAR, total_deaths VARCHAR, military_deaths VARCHAR)", "sql": "SELECT total_casualties FROM table_name_47 WHERE total_deaths = 12 AND military_deaths > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Which ocean has the minimum temperature?", "schema": "CREATE TABLE temperature_readings (location TEXT, temperature FLOAT); INSERT INTO temperature_readings (location, temperature) VALUES ('Arctic Ocean', -2.34), ('North Atlantic', 12.56), ('North Pacific', 15.43);", "sql": "SELECT location FROM temperature_readings WHERE temperature = (SELECT MIN(temperature) FROM temperature_readings);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the sabaean for sabʕ-", "schema": "CREATE TABLE table_26919_6 (sabaean VARCHAR, arabic VARCHAR)", "sql": "SELECT sabaean FROM table_26919_6 WHERE arabic = 'sabʕ-';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What regular season had a record of 16-12?", "schema": "CREATE TABLE table_name_7 (reg_season VARCHAR, record VARCHAR)", "sql": "SELECT reg_season FROM table_name_7 WHERE record = '16-12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was 2009, when 2012 was A, when 2003 was A, and then the Tournament was the Madrid Masters?", "schema": "CREATE TABLE table_name_18 (tournament VARCHAR)", "sql": "SELECT 2009 FROM table_name_18 WHERE 2012 = 'a' AND 2003 = 'a' AND tournament = 'madrid masters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What incumbent won the district of texas 22?", "schema": "CREATE TABLE table_1341598_44 (incumbent VARCHAR, district VARCHAR)", "sql": "SELECT incumbent FROM table_1341598_44 WHERE district = 'Texas 22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show veteran employment statistics for the state of 'California'", "schema": "CREATE TABLE veteran_employment (state VARCHAR(255), employed INT, unemployed INT, total_veterans INT); INSERT INTO veteran_employment (state, employed, unemployed, total_veterans) VALUES ('California', 50000, 3000, 55000), ('New York', 45000, 4000, 50000);", "sql": "SELECT employed, unemployed, total_veterans FROM veteran_employment WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Find the total sales of beauty products that are not vegan and not cruelty-free", "schema": "CREATE TABLE sales (product_type VARCHAR(20), region VARCHAR(10), sales NUMERIC(10,2)); INSERT INTO sales (product_type, region, sales) VALUES ('lipstick', 'North', 500), ('mascara', 'East', 600), ('eyeshadow', 'West', 400), ('blush', 'South', 700), ('foundation', 'North', 800), ('lipstick', 'West', 900); CREATE TABLE products (product_type VARCHAR(20), vegan BOOLEAN, cruelty_free BOOLEAN); INSERT INTO products (product_type, vegan, cruelty_free) VALUES ('lipstick', FALSE, FALSE), ('mascara', TRUE, FALSE), ('eyeshadow', FALSE, TRUE), ('blush', FALSE, TRUE), ('foundation', FALSE, FALSE);", "sql": "SELECT SUM(sales) FROM sales INNER JOIN products ON sales.product_type = products.product_type WHERE products.vegan = FALSE AND products.cruelty_free = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What are the labor productivity metrics for mining in Australia and Canada in 2019?", "schema": "CREATE TABLE LaborProductivity (Country VARCHAR(255), Year INT, Sector VARCHAR(255), Productivity DECIMAL(5,2)); INSERT INTO LaborProductivity (Country, Year, Sector, Productivity) VALUES ('Australia', 2019, 'Mining', 45.67), ('Australia', 2019, 'Mining', 50.11), ('Canada', 2019, 'Mining', 60.00), ('Canada', 2019, 'Mining', 65.55);", "sql": "SELECT Context.Country, Context.Productivity FROM LaborProductivity as Context WHERE Context.Year = 2019 AND Context.Sector = 'Mining' AND Context.Country IN ('Australia', 'Canada');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Calculate the total volume of wastewater treated in 'Rio de Janeiro' for the first half of the year 2021", "schema": "CREATE TABLE wastewater_treatment (region VARCHAR(50), date DATE, volume FLOAT); INSERT INTO wastewater_treatment (region, date, volume) VALUES ('Rio de Janeiro', '2021-01-01', 500), ('Rio de Janeiro', '2021-02-01', 550), ('Rio de Janeiro', '2021-03-01', 600);", "sql": "SELECT SUM(volume) FROM wastewater_treatment WHERE region = 'Rio de Janeiro' AND date BETWEEN '2021-01-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the minimum price of a product in the education_products table?", "schema": "CREATE TABLE education_products (product_id INT, product_name TEXT, price DECIMAL); INSERT INTO education_products (product_id, product_name, price) VALUES (1, 'Sustainability Guidebook', 20), (2, 'Eco-Friendly Craft Kit', 30), (3, 'Educational Poster', 10);", "sql": "SELECT MIN(price) FROM education_products;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What was the total revenue for all artworks sold by the 'Impressionist' movement in the year 2010?", "schema": "CREATE TABLE Artworks (artwork_id INT, movement VARCHAR(255), sale_year INT, revenue DECIMAL(10, 2));", "sql": "SELECT SUM(revenue) FROM Artworks WHERE movement = 'Impressionist' AND sale_year = 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season saw 6 cup apps and 5 cup goals?", "schema": "CREATE TABLE table_name_87 (season VARCHAR, cup_apps VARCHAR, cup_goals VARCHAR)", "sql": "SELECT season FROM table_name_87 WHERE cup_apps = 6 AND cup_goals = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of the 1999 fifa confederations cup?", "schema": "CREATE TABLE table_name_45 (date VARCHAR, competition VARCHAR)", "sql": "SELECT date FROM table_name_45 WHERE competition = '1999 fifa confederations cup';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time/retired for the driver emerson fittipaldi?", "schema": "CREATE TABLE table_name_55 (time_retired VARCHAR, driver VARCHAR)", "sql": "SELECT time_retired FROM table_name_55 WHERE driver = 'emerson fittipaldi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent with the record of 4-0-0?", "schema": "CREATE TABLE table_name_46 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_46 WHERE record = '4-0-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average energy consumption of hotels in Canada per square foot?", "schema": "CREATE TABLE hotel_data(id INT, hotel_name TEXT, country TEXT, sqft INT, energy_consumption INT); INSERT INTO hotel_data (id, hotel_name, country, sqft, energy_consumption) VALUES (1, 'Hotel A', 'Canada', 10000, 1200), (2, 'Hotel B', 'Canada', 12000, 1400), (3, 'Hotel C', 'Canada', 15000, 1800);", "sql": "SELECT AVG(energy_consumption / sqft) FROM hotel_data WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many sustainable sourcing audits were conducted in 'California' and 'Texas'?", "schema": "CREATE TABLE sourcing_audits (restaurant_name TEXT, location TEXT, audit_date DATE); INSERT INTO sourcing_audits (restaurant_name, location, audit_date) VALUES ('Restaurant A', 'California', '2021-06-01'), ('Restaurant B', 'California', '2021-07-15'), ('Restaurant C', 'Texas', '2021-08-05'), ('Restaurant D', 'California', '2021-09-01');", "sql": "SELECT COUNT(*) FROM sourcing_audits WHERE location IN ('California', 'Texas');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least attendance for 52-37 record", "schema": "CREATE TABLE table_name_53 (attendance INTEGER, record VARCHAR)", "sql": "SELECT MIN(attendance) FROM table_name_53 WHERE record = '52-37';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total number of workouts in the last month for members who identify as female and have a smartwatch?", "schema": "CREATE TABLE Members (MemberID INT, Gender VARCHAR(10), HasSmartwatch BOOLEAN); CREATE TABLE Workouts (WorkoutID INT, MemberID INT, WorkoutDate DATE);", "sql": "SELECT COUNT(*) FROM Workouts INNER JOIN Members ON Workouts.MemberID = Members.MemberID WHERE Members.Gender = 'female' AND Members.HasSmartwatch = TRUE AND WorkoutDate >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "How many police stations are there in New Delhi and Bangkok?", "schema": "CREATE TABLE Stations (City VARCHAR(20), Number INT); INSERT INTO Stations (City, Number) VALUES ('New Delhi', 12), ('Bangkok', 8);", "sql": "SELECT SUM(Number) FROM Stations WHERE City IN ('New Delhi', 'Bangkok');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Select the total number of public libraries in New York City", "schema": "CREATE TABLE public_libraries (library_id INT, name VARCHAR(255), location VARCHAR(255), city VARCHAR(255), state VARCHAR(255), zip INT); INSERT INTO public_libraries (library_id, name, location, city, state, zip) VALUES (1, 'New York Public Library', 'Fifth Avenue', 'New York', 'NY', 10003); INSERT INTO public_libraries (library_id, name, location, city, state, zip) VALUES (2, 'Brooklyn Public Library', 'Grand Army Plaza', 'Brooklyn', 'NY', 11238);", "sql": "SELECT COUNT(*) FROM public_libraries WHERE city = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Competition of friendly, and a Date of 03-jun-62 had what score?", "schema": "CREATE TABLE table_name_72 (score VARCHAR, competition VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_72 WHERE competition = 'friendly' AND date = '03-jun-62';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many female dependents are there?", "schema": "CREATE TABLE dependent (sex VARCHAR)", "sql": "SELECT COUNT(*) FROM dependent WHERE sex = 'F';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Update the location of the 'Batik' art to 'Indonesia' in the Arts table.", "schema": "CREATE TABLE Arts (ArtID INT, ArtName VARCHAR(50), Type VARCHAR(50), Location VARCHAR(50)); INSERT INTO Arts (ArtID, ArtName, Type, Location) VALUES (1, 'Batik', 'Textile', 'Nigeria');", "sql": "UPDATE Arts SET Location = 'Indonesia' WHERE ArtName = 'Batik';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the total number of wildlife habitats for each type?", "schema": "CREATE TABLE wildlife_habitat(type VARCHAR(255), count INT); INSERT INTO wildlife_habitat(type, count) VALUES ('Forest', 300), ('Wetland', 200), ('Grassland', 150), ('Desert', 50);", "sql": "SELECT type, SUM(count) FROM wildlife_habitat;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance on June 27?", "schema": "CREATE TABLE table_name_6 (att INTEGER, date VARCHAR)", "sql": "SELECT MIN(att) FROM table_name_6 WHERE date = 'june 27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest React, when Mark is 46.26 sb, and when Lane is greater than 6?", "schema": "CREATE TABLE table_name_61 (react INTEGER, mark VARCHAR, lane VARCHAR)", "sql": "SELECT MIN(react) FROM table_name_61 WHERE mark = '46.26 sb' AND lane > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the percentage of students who have completed a lifelong learning course in 'South High' school?", "schema": "CREATE TABLE students_lifelong_learning (student_id INT, school_id INT, completed_course INT); INSERT INTO students_lifelong_learning VALUES (1, 1, 1); INSERT INTO students_lifelong_learning VALUES (2, 1, 0); INSERT INTO students_lifelong_learning VALUES (3, 2, 1); INSERT INTO students_lifelong_learning VALUES (4, 2, 1); CREATE TABLE school_roster (student_id INT, school_id INT, school_name VARCHAR(255)); INSERT INTO school_roster VALUES (1, 1, 'South High'); INSERT INTO school_roster VALUES (2, 1, 'South High'); INSERT INTO school_roster VALUES (3, 2, 'North Middle'); INSERT INTO school_roster VALUES (4, 2, 'North Middle');", "sql": "SELECT s.school_name, 100.0 * SUM(CASE WHEN sl.completed_course = 1 THEN 1 ELSE 0 END) / COUNT(sr.student_id) AS completion_percentage FROM school_roster sr INNER JOIN students_lifelong_learning sl ON sr.student_id = sl.student_id INNER JOIN schools s ON sr.school_id = s.school_id WHERE s.school_name = 'South High' GROUP BY s.school_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 340, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is team Yamaha with 3 points ranked?", "schema": "CREATE TABLE table_name_8 (rank VARCHAR, team VARCHAR, points VARCHAR)", "sql": "SELECT rank FROM table_name_8 WHERE team = 'yamaha' AND points = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Pick number that had a road number that was less than 6, featured Bob Dailey as a player, and which had a Reg GP bigger than 257?", "schema": "CREATE TABLE table_name_43 (pick__number INTEGER, reg_gp VARCHAR, rd__number VARCHAR, player VARCHAR)", "sql": "SELECT MAX(pick__number) FROM table_name_43 WHERE rd__number < 6 AND player = 'bob dailey' AND reg_gp > 257;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Delete all records of founders who have not raised any funds.", "schema": "CREATE TABLE founders (id INT, name TEXT, race TEXT, funds_raised FLOAT); INSERT INTO founders (id, name, race, funds_raised) VALUES (1, 'Alice', 'Asian', 5000000), (2, 'Bob', 'Black', NULL), (3, 'Charlie', 'Latinx', 3000000), (4, 'Diana', 'White', 1000000), (5, 'Eve', 'Asian', NULL), (6, 'Frank', 'Black', 2000000), (7, 'Grace', 'Latinx', NULL), (8, 'Hugo', 'White', 8000000);", "sql": "DELETE FROM founders WHERE funds_raised IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Wheel Arrangement, when Quantity Made is 44?", "schema": "CREATE TABLE table_name_8 (wheel_arrangement VARCHAR, quantity_made VARCHAR)", "sql": "SELECT wheel_arrangement FROM table_name_8 WHERE quantity_made = '44';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the pick number average for the player who was drafted before round 7, and went to college at Tennessee?", "schema": "CREATE TABLE table_name_40 (pick__number INTEGER, round VARCHAR, college VARCHAR)", "sql": "SELECT AVG(pick__number) FROM table_name_40 WHERE round < 7 AND college = 'tennessee';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'compression_lz4' (example 17).", "schema": null, "sql": "INSERT INTO cmmove3 SELECT * FROM cmdata_pglz;", "explanation": "DML from PostgreSQL core regression test for Compression Lz4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the record 4-2?", "schema": "CREATE TABLE table_name_66 (date VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_66 WHERE record = '4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the average, maximum, and minimum number of floors for all buildings?", "schema": "CREATE TABLE building (floors INTEGER)", "sql": "SELECT AVG(floors), MAX(floors), MIN(floors) FROM building;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 294).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_trigger ( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Identify the states with increasing water consumption in the last 6 months.", "schema": "CREATE TABLE monthly_usage (state TEXT, month DATE, consumption INTEGER); INSERT INTO monthly_usage (state, month, consumption) VALUES ('California', '2022-01-01', 1200000), ('California', '2022-02-01', 1300000), ('California', '2022-03-01', 1400000), ('California', '2022-04-01', 1500000), ('Texas', '2022-01-01', 1800000), ('Texas', '2022-02-01', 1900000), ('Texas', '2022-03-01', 2000000), ('Texas', '2022-04-01', 2100000), ('Florida', '2022-01-01', 1500000), ('Florida', '2022-02-01', 1600000), ('Florida', '2022-03-01', 1550000), ('Florida', '2022-04-01', 1650000);", "sql": "SELECT state FROM monthly_usage WHERE consumption > (SELECT consumption FROM monthly_usage WHERE state = monthly_usage.state AND month = DATE_SUB(month, INTERVAL 1 MONTH)) GROUP BY state HAVING COUNT(*) = 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the quantity made number for the quantity preserved 4-6-0 — ooooo — ten-wheeler?", "schema": "CREATE TABLE table_name_65 (quantity_made VARCHAR, quantity_preserved VARCHAR)", "sql": "SELECT quantity_made FROM table_name_65 WHERE quantity_preserved = '4-6-0 — ooooo — ten-wheeler';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Show the total number of game sessions for each game in the 'GameSessions' table", "schema": "CREATE TABLE GameSessions (GameID INT, SessionDuration TIME);", "sql": "SELECT GameID, COUNT(*) as TotalGameSessions FROM GameSessions GROUP BY GameID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Insert a new team in the Teams table", "schema": "CREATE TABLE Teams (TeamID INT, TeamName VARCHAR(50), City VARCHAR(50), Sport VARCHAR(20), EstablishedYear INT);", "sql": "INSERT INTO Teams (TeamID, TeamName, City, Sport, EstablishedYear) VALUES (3, 'Atlanta United FC', 'Atlanta', 'Soccer', 2017);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "List the states with water usage greater than 5000 in 2021.", "schema": "CREATE TABLE water_usage(state VARCHAR(20), year INT, usage FLOAT);", "sql": "SELECT state FROM water_usage WHERE year=2021 GROUP BY state HAVING SUM(usage) > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of trees planted for habitat preservation in the Congo Basin?", "schema": "CREATE TABLE Tree_Planting (Id INT, Planting_Date DATE, Trees_Planted INT, Location VARCHAR(50)); INSERT INTO Tree_Planting (Id, Planting_Date, Trees_Planted, Location) VALUES (1, '2022-01-01', 500, 'Congo Basin'); INSERT INTO Tree_Planting (Id, Planting_Date, Trees_Planted, Location) VALUES (2, '2022-01-02', 700, 'Congo Basin');", "sql": "SELECT SUM(Trees_Planted) FROM Tree_Planting WHERE Location = 'Congo Basin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Find the number of mental health parity violations by state in the last 6 months.", "schema": "CREATE TABLE if not exists mental_health_parity (violation_id INT, violation_date DATE, state VARCHAR(255));", "sql": "SELECT COUNT(*), state FROM mental_health_parity WHERE violation_date >= DATEADD(month, -6, GETDATE()) GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the the pole position with rnd being 16", "schema": "CREATE TABLE table_14638077_2 (pole_position VARCHAR, rnd VARCHAR)", "sql": "SELECT pole_position FROM table_14638077_2 WHERE rnd = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did the opponent score on Sept. 14?", "schema": "CREATE TABLE table_16677887_2 (opponents INTEGER, date VARCHAR)", "sql": "SELECT MAX(opponents) FROM table_16677887_2 WHERE date = 'Sept. 14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the venue when the home team was Fitzroy?", "schema": "CREATE TABLE table_name_76 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_76 WHERE home_team = 'fitzroy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 447).", "schema": null, "sql": "UPDATE temporal_rng SET valid_at = daterange('2018-01-15', '2018-02-15')\n WHERE id = '[2,3)';", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total distance covered by all runners in the 2019 marathon?", "schema": "CREATE TABLE runners (id INT, name TEXT, distance FLOAT, marathon INT); INSERT INTO runners (id, name, distance, marathon) VALUES (1, 'John Doe', 42.2, 2019), (2, 'Jane Smith', 40.5, 2019), (3, 'Alberto Rodriguez', 38.7, 2019);", "sql": "SELECT SUM(distance) FROM runners WHERE marathon = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many patients have 'Anxiety' as their primary diagnosis in 'clinic_NY'?", "schema": "CREATE TABLE clinic_NY (patient_id INT, name VARCHAR(50), primary_diagnosis VARCHAR(50)); INSERT INTO clinic_NY (patient_id, name, primary_diagnosis) VALUES (1, 'James Johnson', 'Anxiety'), (2, 'Sophia Williams', 'Depression'), (3, 'Michael Brown', 'Anxiety');", "sql": "SELECT COUNT(*) FROM clinic_NY WHERE primary_diagnosis = 'Anxiety';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which driver had points over 252 in the season of 2012 with a percentage of possible points at 55.60%?", "schema": "CREATE TABLE table_name_40 (driver VARCHAR, percentage_of_possible_points VARCHAR, points VARCHAR, season VARCHAR)", "sql": "SELECT driver FROM table_name_40 WHERE points > 252 AND season = '2012' AND percentage_of_possible_points = '55.60%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which position has 50+12 points and fewer than 10 draws?", "schema": "CREATE TABLE table_name_92 (position INTEGER, points VARCHAR, draws VARCHAR)", "sql": "SELECT MAX(position) FROM table_name_92 WHERE points = '50+12' AND draws < 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the population (1991) where population (2002) was 14250?", "schema": "CREATE TABLE table_2562572_7 (population__1991_ VARCHAR, population__2002_ VARCHAR)", "sql": "SELECT COUNT(population__1991_) FROM table_2562572_7 WHERE population__2002_ = 14250;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total budget allocated to each department?", "schema": "CREATE TABLE Departments (id INT, department_name TEXT, budget_allocated FLOAT);", "sql": "SELECT department_name, SUM(budget_allocated) as total_budget FROM Departments GROUP BY department_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Brazil scorers have a 1-1 Score?", "schema": "CREATE TABLE table_name_93 (brazil_scorers VARCHAR, score VARCHAR)", "sql": "SELECT brazil_scorers FROM table_name_93 WHERE score = '1-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 572).", "schema": null, "sql": "SELECT m.*\n FROM pg_statistic_ext s, pg_statistic_ext_data d,\n pg_mcv_list_items(d.stxdmcv) m\n WHERE s.stxname = 'mcv_lists_stats'\n AND d.stxoid = s.oid;", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT m.*\n FROM pg_statistic_ext s, pg_statistic_ext_data d,\n pg_mcv_list_items(d.stxdmcv) m\n WHERE s.stxname = 'mcv_lists_stats'\n AND d.stxoid = s.oid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many airlines does Russia has?", "schema": "CREATE TABLE airlines (country VARCHAR)", "sql": "SELECT COUNT(*) FROM airlines WHERE country = 'Russia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the number of reported cases of Zika virus by month in 2017 in Florida?", "schema": "CREATE TABLE zika_virus (id INT, patient_id INT, report_date DATE, state VARCHAR(20));", "sql": "SELECT COUNT(*) FROM zika_virus WHERE state = 'Florida' AND YEAR(report_date) = 2017 GROUP BY MONTH(report_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are every one of the rivals in conclusive where title is zürich", "schema": "CREATE TABLE table_23197088_4 (opponents_in_final VARCHAR, championship VARCHAR)", "sql": "SELECT opponents_in_final FROM table_23197088_4 WHERE championship = 'Zürich';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which NHL team has a College/Junior/Club Team (League) of shattuck-saint mary's school (midget major aaa)?", "schema": "CREATE TABLE table_name_63 (nhl_team VARCHAR, college_junior_club_team__league_ VARCHAR)", "sql": "SELECT nhl_team FROM table_name_63 WHERE college_junior_club_team__league_ = 'shattuck-saint mary's school (midget major aaa)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the high rebounds for american airlines center 20,557", "schema": "CREATE TABLE table_23284271_11 (high_rebounds VARCHAR, location_attendance VARCHAR)", "sql": "SELECT high_rebounds FROM table_23284271_11 WHERE location_attendance = 'American Airlines Center 20,557';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the most common type of competition.", "schema": "CREATE TABLE competition (Competition_type VARCHAR)", "sql": "SELECT Competition_type FROM competition GROUP BY Competition_type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Mountains classification has a Team classification of quick step?", "schema": "CREATE TABLE table_name_81 (mountains_classification VARCHAR, team_classification VARCHAR)", "sql": "SELECT mountains_classification FROM table_name_81 WHERE team_classification = 'quick step';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What format is catalogue WPCR13504 in?", "schema": "CREATE TABLE table_name_94 (format VARCHAR, catalogue VARCHAR)", "sql": "SELECT format FROM table_name_94 WHERE catalogue = 'wpcr13504';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Update the genre of the TV show 'Breaking Bad' to 'Crime' in the 'TV_Shows' table.", "schema": "CREATE TABLE TV_Shows (show_id INT PRIMARY KEY, name VARCHAR(100), genre VARCHAR(50)); INSERT INTO TV_Shows (show_id, name, genre) VALUES (1, 'Breaking Bad', 'Drama'), (2, 'Stranger Things', 'Sci-fi');", "sql": "UPDATE TV_Shows SET genre = 'Crime' WHERE name = 'Breaking Bad';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Rider of stéphane mertens had what lowest points?", "schema": "CREATE TABLE table_name_84 (points INTEGER, rider VARCHAR)", "sql": "SELECT MIN(points) FROM table_name_84 WHERE rider = 'stéphane mertens';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Which players have the highest win rate in FPS games?", "schema": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(50), Wins INT, Losses INT); INSERT INTO Players (PlayerID, Name, Wins, Losses) VALUES (1, 'Bob', 100, 20);", "sql": "SELECT p.Name, (SUM(p.Wins) / (SUM(p.Wins) + SUM(p.Losses))) as WinRate FROM Players p JOIN GameResults gr ON p.PlayerID = gr.PlayerID WHERE gr.GameType = 'FPS' GROUP BY p.Name ORDER BY WinRate DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the last year that someone is first elected?", "schema": "CREATE TABLE table_1341690_9 (first_elected INTEGER)", "sql": "SELECT MAX(first_elected) FROM table_1341690_9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the distribution of aircraft engine types by manufacturer?", "schema": "CREATE TABLE aircraft_engines (id INT, manufacturer VARCHAR(255), engine_type VARCHAR(255), engine_count INT);", "sql": "SELECT manufacturer, engine_type, SUM(engine_count) as total_engine_count FROM aircraft_engines GROUP BY 1, 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the total donation and investment amount for each cause, separated by donations and investments, in the 'cause_donations' and 'cause_investments' tables, respectively, ordered by the total amount in descending order?", "schema": "CREATE TABLE cause_donations (cause_id INT, cause_name TEXT, total_donations DECIMAL(10, 2)); CREATE TABLE cause_investments (cause_id INT, cause_name TEXT, total_investments DECIMAL(10, 2));", "sql": "SELECT cause_name, SUM(total_donations) as total_donation_amount FROM cause_donations GROUP BY cause_name UNION ALL SELECT cause_name, SUM(total_investments) as total_investment_amount FROM cause_investments GROUP BY cause_name ORDER BY total_donation_amount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 264, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 6).", "schema": null, "sql": "SELECT * FROM test_macaddr WHERE i<='22:00:5c:08:55:08'::macaddr ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the minimum number of launches for any SpaceX mission?", "schema": "CREATE TABLE SpaceX_Missions (Id INT, Name VARCHAR(50), NumLaunches INT); INSERT INTO SpaceX_Missions (Id, Name, NumLaunches) VALUES (1, 'Falcon1', 5), (2, 'Falcon9', 10), (3, 'FalconHeavy', 3);", "sql": "SELECT MIN(NumLaunches) FROM SpaceX_Missions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which rank has a more than 0 silver, 4 bronze, and a total smaller than 10?", "schema": "CREATE TABLE table_name_34 (rank INTEGER, total VARCHAR, silver VARCHAR, bronze VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_34 WHERE silver > 0 AND bronze = 4 AND total < 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the record for game where Columbus is visitor, Phoenix is home, and decision is made by Denis?", "schema": "CREATE TABLE table_name_3 (record VARCHAR, home VARCHAR, visitor VARCHAR, decision VARCHAR)", "sql": "SELECT record FROM table_name_3 WHERE visitor = 'columbus' AND decision = 'denis' AND home = 'phoenix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year did the Capital City Giants have a game with the final score of 8-0?", "schema": "CREATE TABLE table_name_54 (year INTEGER, team VARCHAR, score VARCHAR)", "sql": "SELECT AVG(year) FROM table_name_54 WHERE team = 'capital city giants' AND score = '8-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_view' (example 276).", "schema": null, "sql": "-- reverse-listing of various special function syntaxes required by SQL\n\ncreate view tt201v as\nselect\n ('2022-12-01'::date + '1 day'::interval) at time zone 'UTC' as atz,\n extract(day from now()) as extr,\n (now(), '1 day'::interval) overlaps\n (current_timestamp(2), '1 day'::interval) as o,\n 'foo' is normalized isn,\n 'foo' is nfkc normalized isnn,\n normalize('foo') as n,\n normalize('foo', nfkd) as nfkd,\n overlay('foo' placing 'bar' from 2) as ovl,\n overlay('foo' placing 'bar' from 2 for 3) as ovl2,\n position('foo' in 'foobar') as p,\n substring('foo' from 2 for 3) as s,\n substring('foo' similar 'f' escape '#') as ss,\n substring('foo' from 'oo') as ssf, -- historically-permitted abuse\n trim(' ' from ' foo ') as bt,\n trim(leading ' ' from ' foo ') as lt,\n trim(trailing ' foo ') as rt,\n trim(E'\\\\000'::bytea from E'\\\\000Tom\\\\000'::bytea) as btb,\n trim(leading E'\\\\000'::bytea from E'\\\\000Tom\\\\000'::bytea) as ltb,\n trim(trailing E'\\\\000'::bytea from E'\\\\000Tom\\\\000'::bytea) as rtb,\n CURRENT_DATE as cd,\n (select * from CURRENT_DATE) as cd2,\n CURRENT_TIME as ct,\n (select * from CURRENT_TIME) as ct2,\n CURRENT_TIME (1) as ct3,\n (select * from CURRENT_TIME (1)) as ct4,\n CURRENT_TIMESTAMP as ct5,\n (select * from CURRENT_TIMESTAMP) as ct6,\n CURRENT_TIMESTAMP (1) as ct7,\n (select * from CURRENT_TIMESTAMP (1)) as ct8,\n LOCALTIME as lt1,\n (select * from LOCALTIME) as lt2,\n LOCALTIME (1) as lt3,\n (select * from LOCALTIME (1)) as lt4,\n LOCALTIMESTAMP as lt5,\n (select * from LOCALTIMESTAMP) as lt6,\n LOCALTIMESTAMP (1) as lt7,\n (select * from LOCALTIMESTAMP (1)) as lt8,\n CURRENT_CATALOG as ca,\n (select * from CURRENT_CATALOG) as ca2,\n CURRENT_ROLE as cr,\n (select * from CURRENT_ROLE) as cr2,\n CURRENT_SCHEMA as cs,\n (select * from CURRENT_SCHEMA) as cs2,\n CURRENT_USER as cu,\n (select * from CURRENT_USER) as cu2,\n USER as us,\n (select * from USER) as us2,\n SESSION_USER seu,\n (select * from SESSION_USER) as seu2,\n SYSTEM_USER as su,\n (select * from SYSTEM_USER) as su2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 2033, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest First Elected, when District is \"Massachusetts 10\"?", "schema": "CREATE TABLE table_name_40 (first_elected INTEGER, district VARCHAR)", "sql": "SELECT MIN(first_elected) FROM table_name_40 WHERE district = 'massachusetts 10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Against has a Draws larger than 0, a Losses smaller than 16, and a Wins smaller than 4?", "schema": "CREATE TABLE table_name_50 (against INTEGER, wins VARCHAR, draws VARCHAR, losses VARCHAR)", "sql": "SELECT MAX(against) FROM table_name_50 WHERE draws > 0 AND losses < 16 AND wins < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many transactions were made by each client in Q1 of 2021?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(50)); CREATE TABLE transactions (transaction_id INT, client_id INT, date DATE); INSERT INTO clients (client_id, name) VALUES (1, 'John Doe'), (2, 'Jane Smith'); INSERT INTO transactions (transaction_id, client_id, date) VALUES (1, 1, '2021-01-01'), (2, 1, '2021-01-15'), (3, 2, '2021-01-30');", "sql": "SELECT c.client_id, c.name, COUNT(*) FROM clients c INNER JOIN transactions t ON c.client_id = t.client_id WHERE t.date BETWEEN '2021-01-01' AND '2021-03-31' GROUP BY c.client_id, c.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "What's the total gross for the director with ID 1?", "schema": "CREATE TABLE movies(movie_id INT, title VARCHAR(50), director_id INT, budget INT, gross INT); INSERT INTO movies(movie_id, title, director_id, budget, gross) VALUES (1, 'Inception', 1, 160000000, 825532381), (2, 'Interstellar', 1, 165000000, 675044311), (3, 'The Dark Knight', 1, 185000000, 1004558444), (4, 'The Avengers', 2, 220000000, 1518812988), (5, 'Iron Man 3', 2, 200000000, 1215429827);", "sql": "SELECT SUM(gross) FROM movies WHERE director_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Delete all attorneys from the 'attorneys' table with attorney_email containing '@gmail.com'", "schema": "CREATE TABLE attorneys (attorney_id INT, attorney_name VARCHAR(50), attorney_email VARCHAR(50), attorney_phone VARCHAR(15)); INSERT INTO attorneys (attorney_id, attorney_name, attorney_email, attorney_phone) VALUES (1, 'John Smith', 'john.smith@lawfirm.com', '555-555-1111');", "sql": "DELETE FROM attorneys WHERE attorney_email LIKE '%@gmail.com';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 3).", "schema": null, "sql": "CREATE TYPE query_int (\n\tINTERNALLENGTH = -1,\n\tINPUT = bqarr_in,\n\tOUTPUT = bqarr_out\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 87, "num_statements": 1} {"question": "How many teachers participated in professional development courses in '2021'?", "schema": "CREATE TABLE teacher_professional_development (teacher_id INT, course_year INT); INSERT INTO teacher_professional_development (teacher_id, course_year) VALUES (1, 2020), (2, 2021), (3, 2021), (4, 2019);", "sql": "SELECT COUNT(*) FROM teacher_professional_development WHERE course_year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of Round in liverpool , england?", "schema": "CREATE TABLE table_name_73 (round VARCHAR, location VARCHAR)", "sql": "SELECT COUNT(round) FROM table_name_73 WHERE location = 'liverpool , england';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "--\n-- External C-functions for R-tree methods\n--\n\n-- Comparison methods\n\nCREATE FUNCTION cube_eq(cube, cube)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "other", "is_postgresql_specific": true, "sql_length": 185, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 259).", "schema": null, "sql": "select interval '-0.1 weeks -9223372036854775808 microseconds';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '-0.1 weeks -9223372036854775808 microseconds') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 29).", "schema": null, "sql": "INSERT INTO tbl_created_outside_xact(id) VALUES('1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was millwall the home team?", "schema": "CREATE TABLE table_name_11 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_11 WHERE home_team = 'millwall';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the minimum fare for buses in each city?", "schema": "CREATE TABLE buses (route_id INT, fare DECIMAL(5,2), city VARCHAR(20)); CREATE TABLE routes (route_id INT, city VARCHAR(20));", "sql": "SELECT r.city, MIN(b.fare) FROM buses b JOIN routes r ON b.route_id = r.route_id GROUP BY r.city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Which countries have more than 5 excavated artifacts?", "schema": "CREATE TABLE Artifacts (ArtifactID int, Name text, SiteID int, ExcavationYear int, Country text); INSERT INTO Artifacts (ArtifactID, Name, SiteID, ExcavationYear, Country) VALUES (1, 'Artifact1', 3, 2000, 'CountryA');", "sql": "SELECT Country, COUNT(*) as ArtifactCount FROM Artifacts GROUP BY Country HAVING ArtifactCount > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the average dissolved oxygen level in the Atlantic Ocean by region?", "schema": "CREATE TABLE ocean_regions (region VARCHAR(255), id INTEGER); INSERT INTO ocean_regions (region, id) VALUES ('North Atlantic', 1), ('South Atlantic', 2); CREATE TABLE dissolved_oxygen (region_id INTEGER, value FLOAT);", "sql": "SELECT o.region, AVG(d.value) FROM dissolved_oxygen d JOIN ocean_regions o ON d.region_id = o.id GROUP BY o.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What's the average ESG rating for companies in the healthcare sector?", "schema": "CREATE TABLE companies (id INT, name TEXT, sector TEXT, ESG_rating FLOAT); INSERT INTO companies (id, name, sector, ESG_rating) VALUES (1, 'Innovative Healthcare', 'Healthcare', 8.3); INSERT INTO companies (id, name, sector, ESG_rating) VALUES (2, 'GreenTech Medical', 'Healthcare', 7.9);", "sql": "SELECT AVG(ESG_rating) FROM companies WHERE sector = 'Healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many marine species are found in the Southern Ocean?", "schema": "CREATE TABLE marine_species (name VARCHAR(50), common_name VARCHAR(50), location VARCHAR(50));", "sql": "SELECT COUNT(*) FROM marine_species WHERE location = 'Southern Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people attended the game with a Tie no of 30?", "schema": "CREATE TABLE table_name_88 (attendance VARCHAR, tie_no VARCHAR)", "sql": "SELECT attendance FROM table_name_88 WHERE tie_no = '30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 8).", "schema": null, "sql": "SELECT a, a <-> '10:57:11' FROM timetmp ORDER BY a <-> '10:57:11' LIMIT 3;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total amount of money spent by Lucas Mancini?", "schema": "CREATE TABLE invoices (total INTEGER, customer_id VARCHAR); CREATE TABLE customers (id VARCHAR, first_name VARCHAR, last_name VARCHAR)", "sql": "SELECT SUM(T2.total) FROM customers AS T1 JOIN invoices AS T2 ON T1.id = T2.customer_id WHERE T1.first_name = 'Lucas' AND T1.last_name = 'Mancini';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 107).", "schema": null, "sql": "SELECT '2'::seg &> '1'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the service number of the Pune Duronto train that departures at 21:35?", "schema": "CREATE TABLE table_23477312_1 (service VARCHAR, train_name VARCHAR, departure VARCHAR)", "sql": "SELECT service FROM table_23477312_1 WHERE train_name = 'Pune Duronto' AND departure = '21:35';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the away team that played at Kardinia Park?", "schema": "CREATE TABLE table_name_2 (away_team VARCHAR, venue VARCHAR)", "sql": "SELECT away_team FROM table_name_2 WHERE venue = 'kardinia park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total number of tickets sold for events in the 'theater' category?", "schema": "CREATE TABLE events (id INT, name TEXT, category TEXT, tickets_sold INT); INSERT INTO events (id, name, category, tickets_sold) VALUES (1, 'Concert', 'music', 200), (2, 'Play', 'theater', 150), (3, 'Festival', 'music', 300);", "sql": "SELECT SUM(tickets_sold) FROM events WHERE category = 'theater';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which opponent plays against the Pohang Steelers?", "schema": "CREATE TABLE table_name_58 (name VARCHAR, opponent VARCHAR)", "sql": "SELECT name FROM table_name_58 WHERE opponent = 'pohang steelers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'case' (example 10).", "schema": null, "sql": "INSERT INTO CASE2_TBL VALUES (2, -4);", "explanation": "DML from PostgreSQL core regression test for Case.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 122).", "schema": null, "sql": "SELECT earth_box(ll_to_earth(0,0),\n earth_distance(ll_to_earth(0,0),ll_to_earth(0,0.01))*.57735) @>\n ll_to_earth(0,0.01);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 183).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_fk ( NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the home team of the game with manu ginóbili (34) as the leading scorer?", "schema": "CREATE TABLE table_name_74 (home VARCHAR, leading_scorer VARCHAR)", "sql": "SELECT home FROM table_name_74 WHERE leading_scorer = 'manu ginóbili (34)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player was chosen for the Chicago Sky?", "schema": "CREATE TABLE table_name_57 (player VARCHAR, wnba_team VARCHAR)", "sql": "SELECT player FROM table_name_57 WHERE wnba_team = 'chicago sky';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of byes when draws are larger than 0?", "schema": "CREATE TABLE table_name_37 (byes INTEGER, draws INTEGER)", "sql": "SELECT MAX(byes) FROM table_name_37 WHERE draws > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the total crop yield for small-scale farms (less than 50 acres) in the United States?", "schema": "CREATE TABLE farms (id INT, farm_name VARCHAR(50), acres INT, total_yield INT); INSERT INTO farms (id, farm_name, acres, total_yield) VALUES (1, 'Farm 1', 25, 10000), (2, 'Farm 2', 75, 20000), (3, 'Farm 3', 45, 15000), (4, 'Farm 4', 55, 25000), (5, 'Farm 5', 35, 12000);", "sql": "SELECT SUM(total_yield) FROM farms WHERE acres < 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which game number had a location/attendance of US Airways Center 7,311 respectively?", "schema": "CREATE TABLE table_17118657_7 (game INTEGER, location_attendance VARCHAR)", "sql": "SELECT MAX(game) FROM table_17118657_7 WHERE location_attendance = 'US Airways Center 7,311';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the total billing amount for cases resolved in January 2021?", "schema": "CREATE TABLE cases (case_id INT, case_status VARCHAR(10), resolved_date DATE, billing_amount DECIMAL); INSERT INTO cases (case_id, case_status, resolved_date, billing_amount) VALUES (1, 'Defendant', '2021-01-15', 5000.00), (2, 'Plaintiff', '2021-02-20', 4000.00), (3, 'Defendant', '2021-03-05', 6000.00);", "sql": "SELECT SUM(billing_amount) FROM cases WHERE resolved_date >= '2021-01-01' AND resolved_date < '2021-02-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average age of refugees supported by 'Red Crescent' in 'Europe'?", "schema": "CREATE TABLE refugee (id INT, name VARCHAR(255), age INT, location VARCHAR(255), supported_by VARCHAR(255), support_date DATE); INSERT INTO refugee (id, name, age, location, supported_by, support_date) VALUES (1, 'Jane Doe', 35, 'Europe', 'Red Crescent', '2022-01-01');", "sql": "SELECT AVG(age) FROM refugee WHERE location = 'Europe' AND supported_by = 'Red Crescent';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the minimum response time for emergency calls in each district?", "schema": "CREATE TABLE emergency_calls (call_id INT, district TEXT, response_time FLOAT); INSERT INTO emergency_calls (call_id, district, response_time) VALUES (1, 'Downtown', 10.5), (2, 'Uptown', 12.0), (3, 'Harbor', 8.0);", "sql": "SELECT district, MIN(response_time) FROM emergency_calls GROUP BY district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Visitor with a Home with chicago, and a Score of 3 – 2?", "schema": "CREATE TABLE table_name_34 (visitor VARCHAR, home VARCHAR, score VARCHAR)", "sql": "SELECT visitor FROM table_name_34 WHERE home = 'chicago' AND score = '3 – 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many areas are named West Isles?", "schema": "CREATE TABLE table_170969_2 (area_km_2 VARCHAR, official_name VARCHAR)", "sql": "SELECT COUNT(area_km_2) FROM table_170969_2 WHERE official_name = 'West Isles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the mountain classification name if the winner is Bernhard Eisel?", "schema": "CREATE TABLE table_22941863_19 (mountains_classification VARCHAR, winner VARCHAR)", "sql": "SELECT mountains_classification FROM table_22941863_19 WHERE winner = 'Bernhard Eisel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL Rowtypes: show example 1.", "schema": null, "sql": "CREATE TYPE complex AS ( r double precision, i double precision ); CREATE TYPE inventory_item AS ( name text, supplier_id integer, price numeric );", "explanation": "Example from PostgreSQL documentation on Rowtypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 147, "num_statements": 2} {"question": "PostgreSQL regression test 'union': Write the SELECT query (example 13).", "schema": null, "sql": "SELECT 1.0::float8 AS two UNION ALL SELECT 1 ORDER BY 1;", "explanation": "Regression test for Union in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 1.0::float8 AS two UNION ALL SELECT 1 ORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the maximum salary in the Engineering department?", "schema": "CREATE TABLE Employees (EmployeeID INT, Department VARCHAR(20), Salary FLOAT); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, 'IT', 75000.00), (2, 'IT', 70000.00), (3, 'Engineering', 95000.00), (4, 'Finance', 85000.00);", "sql": "SELECT MAX(Salary) FROM Employees WHERE Department = 'Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the percentage of all the literate people where females are 73.17?", "schema": "CREATE TABLE table_14598_9 (literate_persons___percentage_ VARCHAR, females___percentage_ VARCHAR)", "sql": "SELECT literate_persons___percentage_ FROM table_14598_9 WHERE females___percentage_ = '73.17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the maximum number of citizens participating in public meetings for each meeting type in 'Meetings' table?", "schema": "CREATE TABLE Meetings (MeetingID INT, MeetingType VARCHAR(20), Citizens INT); INSERT INTO Meetings (MeetingID, MeetingType, Citizens) VALUES (1, 'TownHall', 50), (2, 'Committee', 30), (3, 'TownHall', 60);", "sql": "SELECT MeetingType, MAX(Citizens) AS MaxCitizens FROM Meetings GROUP BY MeetingType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Carlton's score as the home team?", "schema": "CREATE TABLE table_name_65 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_65 WHERE home_team = 'carlton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many volunteers from underrepresented communities in Canada joined in 2020?", "schema": "CREATE TABLE volunteers (volunteer_id INT, joined_date DATE, underrepresented_community BOOLEAN); INSERT INTO volunteers (volunteer_id, joined_date, underrepresented_community) VALUES (1, '2020-05-12', true), (2, '2019-11-04', false);", "sql": "SELECT COUNT(*) FROM volunteers WHERE joined_date BETWEEN '2020-01-01' AND '2020-12-31' AND underrepresented_community = true AND country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What is the total number of employees in each department, and what percentage of them identify as LGBTQ+?", "schema": "CREATE TABLE Employees (EmployeeID INT, FirstName VARCHAR(50), LastName VARCHAR(50), Department VARCHAR(50), Position VARCHAR(50), LGBTQ INT); INSERT INTO Employees (EmployeeID, FirstName, LastName, Department, Position, LGBTQ) VALUES (1, 'John', 'Doe', 'IT', 'Developer', 1), (2, 'Jane', 'Smith', 'IT', 'Developer', 0), (3, 'Alice', 'Johnson', 'IT', 'Manager', 1), (4, 'Bob', 'Brown', 'HR', 'Manager', 0);", "sql": "SELECT Employees.Department, COUNT(Employees.EmployeeID) AS Total_Employees, (SUM(Employees.LGBTQ) / COUNT(Employees.EmployeeID)) * 100 AS Percentage_LGBTQ FROM Employees GROUP BY Employees.Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 57).", "schema": null, "sql": "--\n--\n--\n-- int2 ops\n--\n--\n--\n-- define the GiST support methods\nCREATE FUNCTION gbt_int2_consistent(internal,int2,int2,oid,internal)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least week for opponent of washington redskins", "schema": "CREATE TABLE table_name_87 (week INTEGER, opponent VARCHAR)", "sql": "SELECT MIN(week) FROM table_name_87 WHERE opponent = 'washington redskins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What was the total budget for community development initiatives in Tanzania in 2018?", "schema": "CREATE TABLE CommunityDevelopment (id INT, country VARCHAR(50), initiative VARCHAR(50), budget FLOAT, year INT); INSERT INTO CommunityDevelopment (id, country, initiative, budget, year) VALUES (1, 'Tanzania', 'Youth Skills Training', 300000, 2018), (2, 'Tanzania', 'Community Health Center', 500000, 2019), (3, 'Kenya', 'Women Empowerment Program', 400000, 2018);", "sql": "SELECT SUM(budget) FROM CommunityDevelopment WHERE country = 'Tanzania' AND year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "List all marine research facilities in the Southern Ocean.", "schema": "CREATE TABLE marine_research_facilities (id INT, name TEXT, location TEXT, ocean TEXT); INSERT INTO marine_research_facilities (id, name, location, ocean) VALUES (1, 'Palmer Station', 'Antarctica', 'Southern'), (2, 'McMurdo Station', 'Antarctica', 'Southern');", "sql": "SELECT * FROM marine_research_facilities WHERE ocean = 'Southern';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who used the Ford Cosworth DFV 3.0 v8 engine in rounds 3-12, with a G tire?", "schema": "CREATE TABLE table_name_22 (driver VARCHAR, tyre VARCHAR, engine VARCHAR, rounds VARCHAR)", "sql": "SELECT driver FROM table_name_22 WHERE engine = 'ford cosworth dfv 3.0 v8' AND rounds = '3-12' AND tyre = 'g';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the average age of patients who have completed the recovery program for depression in Canada?", "schema": "CREATE TABLE patients (patient_id INT, age INT, condition VARCHAR(50), country VARCHAR(50)); INSERT INTO patients (patient_id, age, condition, country) VALUES (1, 35, 'Depression', 'Canada'); CREATE TABLE recovery_program (program_id INT, patient_id INT, completion_date DATE); INSERT INTO recovery_program (program_id, patient_id, completion_date) VALUES (1, 1, '2020-03-01');", "sql": "SELECT AVG(patients.age) FROM patients JOIN recovery_program ON patients.patient_id = recovery_program.patient_id WHERE patients.condition = 'Depression' AND patients.country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what year did a car have a yamaha v12 engine and a brabham bt60y chassis", "schema": "CREATE TABLE table_name_70 (year VARCHAR, engine VARCHAR, chassis VARCHAR)", "sql": "SELECT year FROM table_name_70 WHERE engine = 'yamaha v12' AND chassis = 'brabham bt60y';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Insert data into 'GameDesign'", "schema": "CREATE TABLE GameDesign (GameID INT PRIMARY KEY, GameName VARCHAR(50), Genre VARCHAR(50), Platform VARCHAR(50)); INSERT INTO GameDesign (GameID, GameName, Genre, Platform) VALUES (1, 'Fortnite', 'Battle Royale', 'PC'), (2, 'Among Us', 'Party', 'Mobile');", "sql": "INSERT INTO GameDesign (GameID, GameName, Genre, Platform) VALUES (3, 'Minecraft', 'Sandbox', 'PC');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest # Of Constituency Votes, when Election is 2005?", "schema": "CREATE TABLE table_name_22 (_number_of_constituency_votes INTEGER, election VARCHAR)", "sql": "SELECT MAX(_number_of_constituency_votes) FROM table_name_22 WHERE election = 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the week 3 with addison miller in week 2?", "schema": "CREATE TABLE table_name_17 (week_3 VARCHAR, week_2 VARCHAR)", "sql": "SELECT week_3 FROM table_name_17 WHERE week_2 = 'addison miller';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Show landfill capacity data for landfills with capacity over 500,000 tons, sorted by capacity in descending order.", "schema": "CREATE TABLE Landfills (id INT, name VARCHAR(255), capacity INT); INSERT INTO Landfills (id, name, capacity) VALUES (1, 'SiteA', 600000), (2, 'SiteB', 400000), (3, 'SiteC', 700000), (4, 'SiteD', 300000);", "sql": "SELECT * FROM Landfills WHERE capacity > 500000 ORDER BY capacity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of hours spent on professional development by teachers in urban schools?", "schema": "CREATE TABLE schools (school_id INT, school_type VARCHAR(20), teacher_id INT, hours_pd INT); INSERT INTO schools (school_id, school_type, teacher_id, hours_pd) VALUES (1, 'Urban', 1, 5), (2, 'Rural', 2, 3), (3, 'Urban', 3, 4), (4, 'Suburban', 4, 6), (5, 'Urban', 5, 2);", "sql": "SELECT school_type, SUM(hours_pd) as total_hours FROM schools WHERE school_type = 'Urban' GROUP BY school_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Find the number of fraudulent transactions and their total value, excluding transactions with a value less than 1000, for each employee in the sales department.", "schema": "CREATE TABLE transactions (transaction_id INT, employee_id INT, transaction_type VARCHAR(20), transaction_value DECIMAL(10,2), is_fraudulent BOOLEAN);", "sql": "SELECT employee_id, COUNT(*) as fraud_count, SUM(transaction_value) as total_fraud_value FROM transactions WHERE transaction_type = 'Sales' AND is_fraudulent = TRUE AND transaction_value >= 1000 GROUP BY employee_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the GT2 Winning Team if Greg Mansell Leo Mansell was the LMP1 Winning Team?", "schema": "CREATE TABLE table_24865763_2 (gt2_winning_team VARCHAR, lmp1_winning_team VARCHAR)", "sql": "SELECT gt2_winning_team FROM table_24865763_2 WHERE lmp1_winning_team = 'Greg Mansell Leo Mansell';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Which customers have accounts in the 'High Value' category and have made at least one transaction?", "schema": "CREATE TABLE customers (customer_id INT, name TEXT); INSERT INTO customers (customer_id, name) VALUES (1, 'John Doe'); INSERT INTO customers (customer_id, name) VALUES (2, 'Jane Smith'); CREATE TABLE accounts (account_id INT, account_type TEXT, customer_id INT); INSERT INTO accounts (account_id, account_type, customer_id) VALUES (1, 'High Value', 1); INSERT INTO accounts (account_id, account_type, customer_id) VALUES (2, 'Standard', 2); CREATE TABLE customer_transactions (transaction_id INT, customer_id INT); INSERT INTO customer_transactions (transaction_id, customer_id) VALUES (1, 1);", "sql": "SELECT customers.name FROM customers JOIN accounts ON customers.customer_id = accounts.customer_id JOIN customer_transactions ON customers.customer_id = customer_transactions.customer_id WHERE accounts.account_type = 'High Value';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 230, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 334).", "schema": null, "sql": "SELECT jsb FROM jsonb_populate_record(NULL::jsbrec, '{\"jsb\": \"123.45\"}') q;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsb FROM jsonb_populate_record(NULL::jsbrec, '{\"jsb\": \"123.45\"}') q) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Status with an Author that is colbert?", "schema": "CREATE TABLE table_name_25 (status VARCHAR, authors VARCHAR)", "sql": "SELECT status FROM table_name_25 WHERE authors = 'colbert';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average water temperature in salmon farms in Norway?", "schema": "CREATE TABLE salmon_farms (id INT, name TEXT, country TEXT); CREATE TABLE temperature_readings (id INT, farm_id INT, temperature FLOAT); INSERT INTO salmon_farms (id, name, country) VALUES (1, 'Farm X', 'Norway'), (2, 'Farm Y', 'Norway'), (3, 'Farm Z', 'Canada'); INSERT INTO temperature_readings (id, farm_id, temperature) VALUES (1, 1, 12.5), (2, 1, 13.0), (3, 2, 11.0), (4, 2, 11.5), (5, 3, 7.0);", "sql": "SELECT AVG(temperature) FROM temperature_readings TR JOIN salmon_farms SF ON TR.farm_id = SF.id WHERE SF.country = 'Norway';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the church that is located in florø?", "schema": "CREATE TABLE table_name_29 (church_name VARCHAR, location_of_the_church VARCHAR)", "sql": "SELECT church_name FROM table_name_29 WHERE location_of_the_church = 'florø';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average mental health score of students per school in the last year?", "schema": "CREATE TABLE student_mental_health (student_id INT, school_id INT, mental_health_score INT, date DATE); INSERT INTO student_mental_health (student_id, school_id, mental_health_score, date) VALUES (1, 101, 75, '2021-09-01'); INSERT INTO student_mental_health (student_id, school_id, mental_health_score, date) VALUES (2, 101, 80, '2021-09-02');", "sql": "SELECT school_id, AVG(mental_health_score) as avg_mental_health_score FROM student_mental_health WHERE date >= DATEADD(year, -1, GETDATE()) GROUP BY school_id ORDER BY avg_mental_health_score DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Insert new labor productivity records for mines in Arizona with productivity scores above 4.5.", "schema": "CREATE TABLE labor_productivity (id INT, mine_id INT, productivity_score FLOAT, FOREIGN KEY (mine_id) REFERENCES mines(id)); CREATE TABLE mines (id INT, name VARCHAR(50), location VARCHAR(50), PRIMARY KEY(id)); INSERT INTO mines (id, name, location) VALUES (13, 'Arizona Mine', 'Arizona'); INSERT INTO mines (id, name, location) VALUES (14, 'Southwest Mine', 'Arizona');", "sql": "INSERT INTO labor_productivity (id, mine_id, productivity_score) VALUES (9, 13, 4.6), (10, 14, 4.8);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Silver medals did the Nation of Croatia receive with a Total medal of more than 1?", "schema": "CREATE TABLE table_name_3 (silver INTEGER, nation VARCHAR, bronze VARCHAR, total VARCHAR)", "sql": "SELECT MIN(silver) FROM table_name_3 WHERE bronze = 1 AND total > 1 AND nation = 'croatia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "PostgreSQL regression test 'with': Write the SELECT query (example 73).", "schema": null, "sql": "select * from v_search;", "explanation": "Regression test for With in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from v_search) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the year of Megáll az Idö?", "schema": "CREATE TABLE table_name_60 (year VARCHAR, original_title VARCHAR)", "sql": "SELECT year FROM table_name_60 WHERE original_title = 'megáll az idö';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total quantity of coal, iron, and gold mined by each mine?", "schema": "CREATE TABLE Mine (MineID int, MineName varchar(50), Location varchar(50)); CREATE TABLE Coal (CoalID int, MineID int, CoalQuantity int); CREATE TABLE Iron (IronID int, MineID int, IronQuantity int); CREATE TABLE Gold (GoldID int, MineID int, GoldQuantity int); INSERT INTO Mine VALUES (1, 'ABC Mine', 'Colorado'), (2, 'DEF Mine', 'Wyoming'), (3, 'GHI Mine', 'West Virginia'); INSERT INTO Coal VALUES (1, 1, 5000), (2, 2, 7000), (3, 3, 6000); INSERT INTO Iron VALUES (1, 1, 8000), (2, 2, 9000), (3, 3, 10000); INSERT INTO Gold VALUES (1, 1, 12000), (2, 2, 15000), (3, 3, 18000);", "sql": "SELECT MineName, SUM(CoalQuantity) as TotalCoalQuantity, SUM(IronQuantity) as TotalIronQuantity, SUM(GoldQuantity) as TotalGoldQuantity FROM Mine LEFT JOIN Coal ON Mine.MineID = Coal.MineID LEFT JOIN Iron ON Mine.MineID = Iron.MineID LEFT JOIN Gold ON Mine.MineID = Gold.MineID GROUP BY MineName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 296, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 26).", "schema": null, "sql": "SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,\nlowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM\nhash_metapage_info(get_raw_page('test_hash_a_idx', 5));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 25).", "schema": null, "sql": "select '(!,()'::textrange;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '(!,()'::textrange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the attendance of week 12?", "schema": "CREATE TABLE table_name_63 (attendance INTEGER, week VARCHAR)", "sql": "SELECT MIN(attendance) FROM table_name_63 WHERE week = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many 'Assault' and 'Robbery' crimes were reported in each district for 2021, from the 'CrimeStats' table?", "schema": "CREATE TABLE CrimeStats (district VARCHAR(20), crimeType VARCHAR(20), year INT, number INT);", "sql": "SELECT district, crimeType, SUM(number) FROM CrimeStats WHERE (crimeType IN ('Assault', 'Robbery') AND year = 2021) GROUP BY district, crimeType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What is the total installed capacity of renewable energy projects, broken down by country and project type?", "schema": "CREATE TABLE renewable_energy (country VARCHAR(50), project_type VARCHAR(50), installed_capacity INT); INSERT INTO renewable_energy (country, project_type, installed_capacity) VALUES ('USA', 'Wind', 3000), ('USA', 'Solar', 5000), ('Mexico', 'Wind', 2000), ('Mexico', 'Solar', 4000);", "sql": "SELECT country, project_type, SUM(installed_capacity) FROM renewable_energy GROUP BY country, project_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "PostgreSQL regression test 'xmlmap': Write the SELECT query (example 32).", "schema": null, "sql": "SELECT schema_to_xml('testxmlschema', true, false, '');", "explanation": "Regression test for Xmlmap in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT schema_to_xml('testxmlschema', true, false, '')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "How many local vendors have partnered with our platform in Spain?", "schema": "CREATE TABLE vendors (id INT, name TEXT, country TEXT); INSERT INTO vendors (id, name, country) VALUES (1, 'Vendor A', 'Spain'), (2, 'Vendor B', 'France');", "sql": "SELECT COUNT(*) FROM vendors WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the publication rate of graduate students in the Art department?", "schema": "CREATE TABLE students (id INT, name VARCHAR(100), department VARCHAR(50), publication_count INT); INSERT INTO students VALUES (1, 'Taylor Brown', 'Art', 1);", "sql": "SELECT department, AVG(publication_count) FROM students WHERE department = 'Art' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the total number of players who play sports and action games?", "schema": "CREATE TABLE Players (PlayerID INT, GameType VARCHAR(10)); INSERT INTO Players (PlayerID, GameType) VALUES (1, 'Sports'), (2, 'Strategy'), (3, 'Action'), (4, 'Simulation');", "sql": "SELECT COUNT(*) FROM Players WHERE GameType IN ('Sports', 'Action');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of medication used on the patient who stays in room 111?", "schema": "CREATE TABLE Prescribes (Patient VARCHAR, Medication VARCHAR); CREATE TABLE Medication (name VARCHAR, Code VARCHAR); CREATE TABLE patient (SSN VARCHAR); CREATE TABLE stay (Patient VARCHAR)", "sql": "SELECT T4.name FROM stay AS T1 JOIN patient AS T2 ON T1.Patient = T2.SSN JOIN Prescribes AS T3 ON T3.Patient = T2.SSN JOIN Medication AS T4 ON T3.Medication = T4.Code WHERE room = 111;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What is the total billing amount for cases with a successful outcome in New York?", "schema": "CREATE TABLE cases (case_id INT, case_outcome VARCHAR(20), billing_amount DECIMAL(10, 2), case_location VARCHAR(20)); INSERT INTO cases (case_id, case_outcome, billing_amount, case_location) VALUES (1, 'Successful', 3000, 'New York'), (2, 'Unsuccessful', 2000, 'New York');", "sql": "SELECT SUM(billing_amount) FROM cases WHERE case_outcome = 'Successful' AND case_location = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Money ($) amount of the Player with a To par of e?", "schema": "CREATE TABLE table_name_2 (money___$__ VARCHAR, to_par VARCHAR)", "sql": "SELECT money___$__ FROM table_name_2 WHERE to_par = 'e';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What company has more than 195.34 billion in sales, ranked greater than 7, more than 11.29 billion in profits, and a market value greater than 198.14 billion?", "schema": "CREATE TABLE table_name_62 (company VARCHAR, market_value__billion_$_ VARCHAR, profits__billion_$_ VARCHAR, sales__billion_$_ VARCHAR, rank VARCHAR)", "sql": "SELECT company FROM table_name_62 WHERE sales__billion_$_ > 195.34 AND rank > 7 AND profits__billion_$_ > 11.29 AND market_value__billion_$_ > 198.14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total weight of astronauts from the USA?", "schema": "CREATE TABLE MedicalProfiles(astronaut_id INT, height INT, weight INT, nationality VARCHAR(50));", "sql": "SELECT SUM(weight) FROM MedicalProfiles WHERE nationality = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the count of employees who have completed compliance training?", "schema": "CREATE TABLE Training (EmployeeID INT, TrainingName VARCHAR(50)); INSERT INTO Training (EmployeeID, TrainingName) VALUES (1, 'Diversity and Inclusion Training'), (2, 'Cybersecurity Training'), (3, 'Compliance Training'), (4, 'Cybersecurity Training');", "sql": "SELECT COUNT(DISTINCT EmployeeID) FROM Training WHERE TrainingName = 'Compliance Training';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of loses of the club with a 4 position and less than 23 goals conceded?", "schema": "CREATE TABLE table_name_96 (loses VARCHAR, position VARCHAR, goals_conceded VARCHAR)", "sql": "SELECT COUNT(loses) FROM table_name_96 WHERE position = 4 AND goals_conceded < 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the original air date for the episode with 3.90 u.s. viewers (millions)?", "schema": "CREATE TABLE table_12722302_2 (original_air_date VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT original_air_date FROM table_12722302_2 WHERE us_viewers__million_ = '3.90';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many customers in each size category have purchased in the last 3 months?", "schema": "CREATE TABLE Size_Categories (size_id INT, size_category VARCHAR(255)); INSERT INTO Size_Categories (size_id, size_category) VALUES (1, 'XS'), (2, 'S'), (3, 'M'), (4, 'L'), (5, 'XL'), (6, 'XXL'), (7, 'XXXL'); CREATE TABLE Purchase_History (customer_id INT, purchase_date DATE, size_id INT); INSERT INTO Purchase_History (customer_id, purchase_date, size_id) VALUES (1, '2022-03-15', 3), (2, '2022-03-01', 2), (3, '2022-03-10', 5), (4, '2022-03-20', 4), (5, '2022-03-25', 7), (6, '2022-03-30', 1), (7, '2022-04-01', 6);", "sql": "SELECT size_category, COUNT(*) FROM Size_Categories JOIN Purchase_History ON Size_Categories.size_id = Purchase_History.size_id WHERE purchase_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) AND CURRENT_DATE GROUP BY size_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 237, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the driver in round 8?", "schema": "CREATE TABLE table_27913160_3 (driver VARCHAR, round VARCHAR)", "sql": "SELECT driver FROM table_27913160_3 WHERE round = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which title has a translation in it of 1000 lies?", "schema": "CREATE TABLE table_name_32 (title VARCHAR, translation VARCHAR)", "sql": "SELECT title FROM table_name_32 WHERE translation = '1000 lies';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 454).", "schema": null, "sql": "DELETE FROM temporal_fk_rng2rng WHERE id = '[3,4)';", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the rowers for Australia?", "schema": "CREATE TABLE table_name_54 (rowers VARCHAR, country VARCHAR)", "sql": "SELECT rowers FROM table_name_54 WHERE country = 'australia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average time to resolution for security incidents in the finance department, and how does it compare to the average for the entire organization?", "schema": "CREATE TABLE incidents (incident_id INT, incident_date DATE, resolution_date DATE, department VARCHAR(50));", "sql": "SELECT AVG(DATEDIFF(resolution_date, incident_date)) as avg_resolution_time_finance FROM incidents WHERE department = 'finance'; SELECT AVG(DATEDIFF(resolution_date, incident_date)) as avg_resolution_time_org FROM incidents;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 224, "num_statements": 2} {"question": "Show the average cultural competency score for community health workers in each region", "schema": "CREATE TABLE community_health_workers (id INT PRIMARY KEY, name VARCHAR(255), region VARCHAR(255), years_experience INT, cultural_competency_score INT); INSERT INTO community_health_workers (id, name, region, years_experience, cultural_competency_score) VALUES (1, 'Ada Williams', 'Southeast', 8, 95), (2, 'Brian Johnson', 'Midwest', 5, 80), (3, 'Carla Garcia', 'West', 12, 90), (4, 'Ella Jones', 'Northeast', 6, 85), (5, 'Farhad Ahmed', 'South', 10, 93), (6, 'Graciela Gutierrez', 'Central', 11, 94), (7, 'Hee Jeong Lee', 'Northwest', 7, 87), (8, 'Ibrahim Hussein', 'East', 9, 96), (9, 'Jasmine Patel', 'Southwest', 8, 91);", "sql": "SELECT region, AVG(cultural_competency_score) as avg_score FROM community_health_workers GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of tourist attractions that can be reached by walk or is at address 660 Shea Crescent?", "schema": "CREATE TABLE Tourist_Attractions (Name VARCHAR, Location_ID VARCHAR, How_to_Get_There VARCHAR); CREATE TABLE Locations (Location_ID VARCHAR, Address VARCHAR)", "sql": "SELECT T2.Name FROM Locations AS T1 JOIN Tourist_Attractions AS T2 ON T1.Location_ID = T2.Location_ID WHERE T1.Address = '660 Shea Crescent' OR T2.How_to_Get_There = 'walk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "What is the total volume of marine protected areas in the Pacific Ocean?", "schema": "CREATE TABLE marine_protected_areas (area_name TEXT, area_size INTEGER, avg_depth REAL, ocean_basin TEXT);", "sql": "SELECT SUM(area_size) FROM marine_protected_areas WHERE ocean_basin = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Delete records of machines that have been under maintenance for over a year.", "schema": "CREATE TABLE machines (id INT, model VARCHAR(50), year INT, status VARCHAR(50), maintenance_start_date DATE); INSERT INTO machines (id, model, year, status, maintenance_start_date) VALUES (1, 'CNC Mill', 2015, 'Operational', '2021-02-01'); INSERT INTO machines (id, model, year, status, maintenance_start_date) VALUES (2, '3D Printer', 2018, 'Under Maintenance', '2022-05-10');", "sql": "DELETE FROM machines WHERE status = 'Under Maintenance' AND maintenance_start_date <= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'float4' (example 26).", "schema": null, "sql": "INSERT INTO FLOAT4_TBL(f1) VALUES ('123 5');", "explanation": "DML from PostgreSQL core regression test for Float4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country has Hydra Head Records on February 2005?", "schema": "CREATE TABLE table_name_96 (country VARCHAR, label VARCHAR, date VARCHAR)", "sql": "SELECT country FROM table_name_96 WHERE label = 'hydra head records' AND date = 'february 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is 4zr's callsign?", "schema": "CREATE TABLE table_name_67 (callsign VARCHAR, on_air_id VARCHAR)", "sql": "SELECT callsign FROM table_name_67 WHERE on_air_id = '4zr';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 406).", "schema": null, "sql": "select jsonb_path_query('\"12:34:56 +05:30\"', '$.date()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"12:34:56 +05:30\"', '$.date()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show the most common marine species by location.", "schema": "CREATE TABLE SpeciesLocation (ID INT, Species VARCHAR(50), Location VARCHAR(50), Count INT, ObservationDate DATE); INSERT INTO SpeciesLocation (ID, Species, Location, Count, ObservationDate) VALUES (1, 'Starfish', 'Atlantic', 300, '2020-01-01'), (2, 'Dolphin', 'Pacific', 250, '2020-01-02'), (3, 'Shark', 'Atlantic', 400, '2020-01-03'), (4, 'Jellyfish', 'Atlantic', 500, '2020-01-04');", "sql": "SELECT Location, Species, Count, RANK() OVER (PARTITION BY Location ORDER BY Count DESC) as Rank FROM SpeciesLocation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 118, "num_statements": 1} {"question": "Identify hotels in 'Asia' that have adopted AI chatbots and offer a 'spa' facility.", "schema": "CREATE TABLE hotel_tech (hotel_id INT, hotel_name TEXT, region TEXT, ai_chatbot BOOLEAN, spa BOOLEAN); INSERT INTO hotel_tech (hotel_id, hotel_name, region, ai_chatbot, spa) VALUES (1, 'Hotel Marina', 'Asia', TRUE, TRUE), (2, 'Hotel Bellagio', 'Europe', FALSE, FALSE);", "sql": "SELECT hotel_name FROM hotel_tech WHERE region = 'Asia' AND ai_chatbot = TRUE AND spa = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the average price of samarium in Japan in 2017?", "schema": "CREATE TABLE japan_samarium (id INT, year INT, price DECIMAL); INSERT INTO japan_samarium (id, year, price) VALUES (1, 2015, 250), (2, 2016, 260), (3, 2017, 270);", "sql": "SELECT AVG(price) FROM japan_samarium WHERE year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the p1 diameter (mm) when .300 lapua magnum is the chambering?", "schema": "CREATE TABLE table_26967904_2 (p1_diameter__mm_ VARCHAR, chambering VARCHAR)", "sql": "SELECT p1_diameter__mm_ FROM table_26967904_2 WHERE chambering = '.300 Lapua Magnum';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the percentage of female content creators in the Pacific region?", "schema": "CREATE TABLE content_creators (id INT, gender VARCHAR, region VARCHAR); INSERT INTO content_creators (id, gender, region) VALUES (1, 'Female', 'Pacific'); INSERT INTO content_creators (id, gender, region) VALUES (2, 'Male', 'Atlantic');", "sql": "SELECT region, gender, COUNT(*) as count, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM content_creators WHERE region = 'Pacific'), 2) as percentage FROM content_creators WHERE region = 'Pacific' GROUP BY region, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'update' (example 141).", "schema": null, "sql": "-- Here, RLS checks should succeed while moving row from part_a_10_a_20 to\n-- part_d_1_15. Even though the UPDATE is setting 'c' to an odd number, the\n-- trigger at the destination partition again makes it an even number.\nUPDATE range_parted set a = 'b', c = 151 WHERE a = 'a' and c = 200;", "explanation": "PL/pgSQL object from PostgreSQL core test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 289, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 267).", "schema": null, "sql": "CREATE TABLE prt1_adv_p1 PARTITION OF prt1_adv FOR VALUES FROM (100) TO (200);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many circuits had a winning team of #1 patrón highcroft racing ang gtc winning team #81 alex job racing ?", "schema": "CREATE TABLE table_24037660_2 (circuit VARCHAR, lmp_winning_team VARCHAR, gtc_winning_team VARCHAR)", "sql": "SELECT COUNT(circuit) FROM table_24037660_2 WHERE lmp_winning_team = '#1 Patrón Highcroft Racing' AND gtc_winning_team = '#81 Alex Job Racing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Rank suppliers by their sustainability scores.", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(50), country VARCHAR(50), sustainability_score INT); INSERT INTO suppliers (id, name, country, sustainability_score) VALUES (1, 'GreenTech', 'USA', 85); INSERT INTO suppliers (id, name, country, sustainability_score) VALUES (2, 'EcoInnovations', 'Canada', 90);", "sql": "SELECT *, ROW_NUMBER() OVER (ORDER BY sustainability_score DESC) as rank FROM suppliers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 88, "num_statements": 1} {"question": "What are the names and construction dates of all tunnels in Japan that have a length greater than 5000 meters?", "schema": "CREATE TABLE Tunnels (TunnelID INT, Name TEXT, Length FLOAT, ConstructionYear INT, Country TEXT); INSERT INTO Tunnels (TunnelID, Name, Length, ConstructionYear, Country) VALUES (1, 'Seikan Tunnel', 53.85, 1988, 'Japan');", "sql": "SELECT Tunnels.Name, Tunnels.ConstructionYear FROM Tunnels WHERE Tunnels.Length > 5000.0 AND Tunnels.Country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Calculate the revenue by dish type.", "schema": "CREATE TABLE orders (id INT, dish_id INT, dish_type TEXT, price FLOAT); INSERT INTO orders (id, dish_id, dish_type, price) VALUES (1, 1, 'vegetarian', 7.50), (2, 3, 'non-vegetarian', 11.25), (3, 2, 'vegetarian', 8.95), (4, 1, 'vegetarian', 7.50), (5, 4, 'vegan', 9.75), (6, 5, 'vegan', 10.50), (7, 2, 'vegetarian', 8.95), (8, 1, 'vegetarian', 7.50);", "sql": "SELECT dish_type, SUM(price) FROM orders GROUP BY dish_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score for the game with the record of 3–12?", "schema": "CREATE TABLE table_27698941_6 (score VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_27698941_6 WHERE record = '3–12';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many fastest laps for the nation with 32 (30) entries and starts and fewer than 2 podiums?", "schema": "CREATE TABLE table_name_74 (fastest_laps INTEGER, race_entries__starts_ VARCHAR, podiums VARCHAR)", "sql": "SELECT MIN(fastest_laps) FROM table_name_74 WHERE race_entries__starts_ = '32 (30)' AND podiums < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 804).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('123._456');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "List travel advisories issued for Colombia in Q2 2022.", "schema": "CREATE TABLE travel_advisories (country VARCHAR(20), quarter INT, advisory VARCHAR(100)); INSERT INTO travel_advisories (country, quarter, advisory) VALUES ('Colombia', 2, 'Avoid non-essential travel'), ('Peru', 2, 'Exercise increased caution');", "sql": "SELECT advisory FROM travel_advisories WHERE country = 'Colombia' AND quarter = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plperl' (example 27).", "schema": null, "sql": "SELECT roundtrip('[null, null]', 'ARRAY');", "explanation": "Example query from the 'jsonb_plperl' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Delete outdated building permits from the BuildingPermits table.", "schema": "CREATE TABLE BuildingPermits (PermitID INT, PermitNumber VARCHAR(50), IssueDate DATE, ExpirationDate DATE, ProjectID INT, FOREIGN KEY (ProjectID) REFERENCES Projects(ProjectID));", "sql": "DELETE FROM BuildingPermits WHERE ExpirationDate < CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number drawn when the position was 12?", "schema": "CREATE TABLE table_name_50 (drawn INTEGER, position VARCHAR)", "sql": "SELECT MAX(drawn) FROM table_name_50 WHERE position = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total duration of all classical songs?", "schema": "CREATE TABLE songs (id INT, title TEXT, length FLOAT, genre TEXT); INSERT INTO songs (id, title, length, genre) VALUES (1, 'Song1', 3.2, 'classical'), (2, 'Song2', 4.1, 'rock'), (3, 'Song3', 3.8, 'pop'), (4, 'Song4', 2.1, 'classical'), (5, 'Song5', 5.3, 'classical');", "sql": "SELECT SUM(length) FROM songs WHERE genre = 'classical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Determine the percentage of regions with a high risk rating in the last 6 months.", "schema": "CREATE TABLE GeopoliticalRiskAssessments (Id INT, Region VARCHAR(255), RiskRating VARCHAR(255), AssessmentDate DATE); INSERT INTO GeopoliticalRiskAssessments (Id, Region, RiskRating, AssessmentDate) VALUES (5, 'Europe', 'High', '2021-12-01'); INSERT INTO GeopoliticalRiskAssessments (Id, Region, RiskRating, AssessmentDate) VALUES (6, 'Africa', 'Medium', '2022-02-15');", "sql": "SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM GeopoliticalRiskAssessments WHERE AssessmentDate >= DATEADD(month, -6, GETDATE())) as Percentage FROM GeopoliticalRiskAssessments WHERE RiskRating = 'High';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Write a SQL query to retrieve the names, claim dates, and claim amounts for claims with a status of 'Approved'", "schema": "SELECT p.name, c.claim_date, c.claim_amount FROM claims c JOIN policyholders p ON c.policyholder_id = p.policyholder_id WHERE c.status = 'Approved';", "sql": "SELECT p.name, c.claim_date, c.claim_amount FROM claims c JOIN policyholders p ON c.policyholder_id = p.policyholder_id WHERE c.status = 'Approved';", "explanation": null, "validation_query": null, "source": "synthetic_text_to_sql", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What are the names of all wheelchair-accessible bus stops?", "schema": "CREATE TABLE Stops(id INT, name TEXT, wheelchair_accessible BOOLEAN);", "sql": "SELECT name FROM Stops WHERE wheelchair_accessible = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 72.", "schema": null, "sql": "CREATE OR REPLACE FUNCTION testfunc(integer) RETURNS integer AS $$ .... $$ LANGUAGE plpgsql;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many warfare deaths befell the choctaw nation?", "schema": "CREATE TABLE table_name_33 (deaths_from_warfare VARCHAR, nation VARCHAR)", "sql": "SELECT deaths_from_warfare FROM table_name_33 WHERE nation = 'choctaw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average cost of materials for bridges constructed in California?", "schema": "CREATE TABLE Bridges (BridgeID INT, Location VARCHAR(20), Cost FLOAT); INSERT INTO Bridges (BridgeID, Location, Cost) VALUES (1, 'California', 5000000);", "sql": "SELECT AVG(Cost) FROM Bridges WHERE Location = 'California' AND BridgeID IN (SELECT BridgeID FROM Bridges WHERE Location = 'California');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "PostgreSQL regression test 'create_index': Write the SELECT query (example 363).", "schema": null, "sql": "SELECT count(*) FROM tenk1\n WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);", "explanation": "Regression test for Create Index in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM tenk1\n WHERE hundred = 42 AND (thousand = 42 OR thousand = 99)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average capacity of clinics in rural Montana?", "schema": "CREATE TABLE clinics (id INT, name VARCHAR(50), type VARCHAR(50), capacity INT, region VARCHAR(50)); INSERT INTO clinics (id, name, type, capacity, region) VALUES (1, 'Clinic A', 'Primary Care', 50, 'Rural Montana');", "sql": "SELECT AVG(clinics.capacity) FROM clinics WHERE clinics.region = 'Rural Montana';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 468).", "schema": null, "sql": "CREATE TABLE z1 (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the total revenue for each salesperson who sold garments with a price above '200'?", "schema": "CREATE TABLE sales (id INT, salesperson_id INT, garment_id INT, region TEXT, price INT); INSERT INTO sales (id, salesperson_id, garment_id, region, price) VALUES (1, 1, 1, 'Paris', 250), (2, 1, 2, 'London', 120), (3, 2, 3, 'Paris', 180), (4, 2, 4, 'London', 220), (5, 3, 5, 'Berlin', 200), (6, 3, 6, 'Berlin', 160);", "sql": "SELECT salesperson_id, SUM(price) AS total_revenue FROM sales WHERE price > 200 GROUP BY salesperson_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "How many students with disabilities are enrolled in each region?", "schema": "CREATE TABLE StudentsWithDisabilities (ID INT, Name VARCHAR(50), Disability VARCHAR(50), Region VARCHAR(50)); INSERT INTO StudentsWithDisabilities (ID, Name, Disability, Region) VALUES (1, 'John Doe', 'Visual Impairment', 'Northeast'); INSERT INTO StudentsWithDisabilities (ID, Name, Disability, Region) VALUES (2, 'Jane Smith', 'Hearing Impairment', 'Southeast'); INSERT INTO StudentsWithDisabilities (ID, Name, Disability, Region) VALUES (3, 'Bob Johnson', 'Learning Disability', 'Midwest');", "sql": "SELECT Region, COUNT(*) as NumStudents FROM StudentsWithDisabilities GROUP BY Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "How many algorithmic fairness incidents were reported in Oceania in the last month?", "schema": "CREATE TABLE fairness_incidents (incident_id INT, incident_date DATE, region TEXT); INSERT INTO fairness_incidents (incident_id, incident_date, region) VALUES (1, '2022-06-15', 'Oceania'), (2, '2022-07-11', 'Oceania'), (3, '2022-08-01', 'Oceania');", "sql": "SELECT COUNT(*) FROM fairness_incidents WHERE region = 'Oceania' AND incident_date >= '2022-07-01' AND incident_date < '2022-08-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the lowest average for interview more than 9.57 and delaware and evening gown more than 9.77", "schema": "CREATE TABLE table_name_42 (average INTEGER, evening_gown VARCHAR, interview VARCHAR, country VARCHAR)", "sql": "SELECT MIN(average) FROM table_name_42 WHERE interview > 9.57 AND country = 'delaware' AND evening_gown > 9.77;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which School has a #/ County of 85 wabash, and an IHSAA Football Class of A, and a Mascot of norsemen?", "schema": "CREATE TABLE table_name_43 (school VARCHAR, mascot VARCHAR, _number___county VARCHAR, ihsaa_football_class VARCHAR)", "sql": "SELECT school FROM table_name_43 WHERE _number___county = '85 wabash' AND ihsaa_football_class = 'a' AND mascot = 'norsemen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 79).", "schema": null, "sql": "SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 78).", "schema": null, "sql": "select jsonb_path_query('1', 'lax $[*]');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('1', 'lax $[*]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "How many TEUs were handled by each port in the cargo_handling table in chronological order?", "schema": "CREATE TABLE cargo_handling (port_id INT, port_name VARCHAR(50), teu_count INT, handling_date DATE); INSERT INTO cargo_handling (port_id, port_name, teu_count, handling_date) VALUES (1, 'Port_A', 2000, '2022-01-01'), (2, 'Port_B', 3000, '2022-01-02'), (3, 'Port_C', 1000, '2022-01-03');", "sql": "SELECT port_name, teu_count, ROW_NUMBER() OVER (PARTITION BY port_name ORDER BY handling_date) as rn FROM cargo_handling;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 121, "num_statements": 1} {"question": "Delete all records of volunteers who have not served any hours in the last year.", "schema": "CREATE TABLE volunteers (id INT, name TEXT, volunteer_date DATE, hours_served INT);", "sql": "DELETE FROM volunteers WHERE id NOT IN (SELECT id FROM volunteers WHERE hours_served > 0 AND volunteer_date > DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the average donation amount per month in the 'donations' table?", "schema": "CREATE TABLE donations (id INT, donation_date DATE, amount DECIMAL(10,2));", "sql": "SELECT AVG(amount) AS avg_monthly_donation FROM donations WHERE donation_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY EXTRACT(MONTH FROM donation_date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "PostgreSQL Indices: show example 23.", "schema": null, "sql": "CREATE INDEX mytable_cat_data ON mytable (category, data);", "explanation": "Example from PostgreSQL documentation on Indices.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many creative AI applications have been developed in Africa?", "schema": "CREATE TABLE ai_applications (app_id INT, name TEXT, country TEXT, category TEXT); INSERT INTO ai_applications (app_id, name, country, category) VALUES (1, 'ArtBot', 'Nigeria', 'Creative'), (2, 'MusicGen', 'South Africa', 'Creative'), (3, 'DataViz', 'US', 'Analytical'), (4, 'ChatAssist', 'Canada', 'Assistive');", "sql": "SELECT COUNT(*) FROM ai_applications WHERE country IN (SELECT country FROM ai_applications WHERE category = 'Creative') AND category = 'Creative';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Wins of 0 involves what team?", "schema": "CREATE TABLE table_name_96 (team VARCHAR, wins VARCHAR)", "sql": "SELECT team FROM table_name_96 WHERE wins = 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest average of the contestant with an interview of 8.275 and an evening gown bigger than 8.7?", "schema": "CREATE TABLE table_name_31 (average INTEGER, interview VARCHAR, evening_gown VARCHAR)", "sql": "SELECT MIN(average) FROM table_name_31 WHERE interview = 8.275 AND evening_gown > 8.7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of primary care clinics and their average rating, grouped by state?", "schema": "CREATE TABLE public.healthcare_access (id SERIAL PRIMARY KEY, state TEXT, city TEXT, facility_type TEXT, patients_served INT, rating INT); INSERT INTO public.healthcare_access (state, city, facility_type, patients_served, rating) VALUES ('California', 'San Diego', 'Primary Care Clinic', 7000, 8), ('New York', 'New York City', 'Primary Care Clinic', 12000, 7), ('California', 'Los Angeles', 'Specialty Clinic', 6000, 9);", "sql": "SELECT state, facility_type, AVG(rating) AS avg_rating, COUNT(*) FILTER (WHERE facility_type = 'Primary Care Clinic') AS clinic_count FROM public.healthcare_access GROUP BY state, facility_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the candidates for fred e. busbey?", "schema": "CREATE TABLE table_1342149_13 (candidates VARCHAR, incumbent VARCHAR)", "sql": "SELECT candidates FROM table_1342149_13 WHERE incumbent = 'Fred E. Busbey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the minimum dissolved oxygen level for each species across all tanks?", "schema": "CREATE TABLE Tank_Oxygen (tank VARCHAR(50), species VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO Tank_Oxygen (tank, species, dissolved_oxygen) VALUES ('Tank1', 'Salmon', 6.2), ('Tank1', 'Trout', 7.0), ('Tank2', 'Salmon', 6.5), ('Tank2', 'Tilapia', 5.6);", "sql": "SELECT species, MIN(dissolved_oxygen) FROM Tank_Oxygen GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Find the earliest launch date of any satellite still in orbit.", "schema": "CREATE TABLE satellites_in_orbit (id INT, satellite_id VARCHAR(50), launch_date DATE, in_orbit BOOLEAN);", "sql": "SELECT MIN(launch_date) FROM satellites_in_orbit WHERE in_orbit = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the College of the Player from San Miguel Beermen PBA Team?", "schema": "CREATE TABLE table_name_31 (college VARCHAR, pba_team VARCHAR)", "sql": "SELECT college FROM table_name_31 WHERE pba_team = 'san miguel beermen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who constructed grid 14?", "schema": "CREATE TABLE table_name_34 (constructor VARCHAR, grid VARCHAR)", "sql": "SELECT constructor FROM table_name_34 WHERE grid = 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE FOREIGN DATA WRAPPER (example 3).", "schema": null, "sql": "CREATE FOREIGN DATA WRAPPER mywrapper OPTIONS (debug 'true');", "explanation": "PostgreSQL CREATE FOREIGN DATA WRAPPER command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Country had a Player with a Score of 72-66-72=210?", "schema": "CREATE TABLE table_name_17 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_17 WHERE score = 72 - 66 - 72 = 210;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "list the names of employees who have never published an article", "schema": "CREATE TABLE Employees (id INT, name VARCHAR(50)); CREATE TABLE Articles (id INT, author_id INT, published_date DATE); INSERT INTO Employees (id, name) VALUES (1, 'John Doe'); INSERT INTO Employees (id, name) VALUES (2, 'Jane Smith'); INSERT INTO Articles (id, author_id, published_date) VALUES (1, 1, '2022-01-01'); INSERT INTO Articles (id, author_id, published_date) VALUES (2, 2, '2022-01-02');", "sql": "SELECT e.id, e.name FROM Employees e LEFT JOIN Articles a ON e.id = a.author_id WHERE a.id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many outcome have a score of 7–6 (9–7) , 6–3?", "schema": "CREATE TABLE table_22834834_3 (outcome VARCHAR, score_in_the_final VARCHAR)", "sql": "SELECT COUNT(outcome) FROM table_22834834_3 WHERE score_in_the_final = '7–6 (9–7) , 6–3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE MEANING WITH Pīnyīn of chē?", "schema": "CREATE TABLE table_name_61 (meaning VARCHAR, pīnyīn VARCHAR)", "sql": "SELECT meaning FROM table_name_61 WHERE pīnyīn = 'chē';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the car number for 1997", "schema": "CREATE TABLE table_1688640_4 (car__number VARCHAR, year_started VARCHAR)", "sql": "SELECT car__number FROM table_1688640_4 WHERE year_started = 1997;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many employees in the USA are working in the mining industry by department, ordered by the number of employees in descending order?", "schema": "CREATE TABLE employees (id INT, mine_name TEXT, location TEXT, department TEXT, employee_id INT, hire_date DATE); INSERT INTO employees (id, mine_name, location, department, employee_id, hire_date) VALUES (1, 'Golden Mine', 'USA', 'Mining', 1001, '2020-01-01'), (2, 'Silver Mine', 'USA', 'Geology', 1002, '2020-01-02'), (3, 'Bronze Mine', 'USA', 'Engineering', 1003, '2020-01-03');", "sql": "SELECT department, COUNT(employee_id) as num_employees FROM employees WHERE location = 'USA' GROUP BY department ORDER BY num_employees DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the maximum production cost for fair trade products?", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(255), production_cost DECIMAL(5,2), fair_trade BOOLEAN); INSERT INTO products (product_id, product_name, production_cost, fair_trade) VALUES (1, 'Fair Trade Coffee', 8.99, true); INSERT INTO products (product_id, product_name, production_cost, fair_trade) VALUES (2, 'Fair Trade Tea', 6.50, true);", "sql": "SELECT MAX(production_cost) FROM products WHERE fair_trade = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For entries with a lost of 5, what is the sum of the draw entry?", "schema": "CREATE TABLE table_name_23 (draw INTEGER, lost VARCHAR)", "sql": "SELECT SUM(draw) FROM table_name_23 WHERE lost = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the average age of vessels that have had safety incidents in the Mediterranean in 2021?", "schema": "CREATE TABLE Region (region_id INT PRIMARY KEY, region_name VARCHAR(255)); INSERT INTO Region (region_id, region_name) VALUES (1, 'Mediterranean'); CREATE TABLE Vessel (vessel_id INT PRIMARY KEY, vessel_name VARCHAR(255), region_id INT, build_date DATE); CREATE TABLE Safety_Incident (incident_id INT PRIMARY KEY, vessel_id INT, incident_date DATE);", "sql": "SELECT AVG(DATEDIFF('day', V.build_date, GETDATE())) FROM Vessel V JOIN Safety_Incident SI ON V.vessel_id = SI.vessel_id WHERE SI.incident_date >= '2021-01-01' AND SI.incident_date < '2022-01-01' AND V.region_id = (SELECT region_id FROM Region WHERE region_name = 'Mediterranean');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 281, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What percentage of The Bronx voters occured when Manhattan had 29.9% of voters?", "schema": "CREATE TABLE table_1108394_34 (the_bronx VARCHAR, manhattan VARCHAR)", "sql": "SELECT the_bronx FROM table_1108394_34 WHERE manhattan = '29.9_percentage';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many solar power plants are there in California and Texas?", "schema": "CREATE TABLE solar_plants (state VARCHAR(50), num_plants INT); INSERT INTO solar_plants (state, num_plants) VALUES ('California', 2153), ('Texas', 1194);", "sql": "SELECT SUM(num_plants) FROM solar_plants WHERE state IN ('California', 'Texas');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 41).", "schema": null, "sql": "SELECT index('0.1.2.3.5.4.5.6.8.5.6.8','5.6',-20000);", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 305).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_schema( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Championship, when Outcome is \"runner-up\", and when Opponents In Final is \"Gigi Fernández Natalia Zvereva\"?", "schema": "CREATE TABLE table_name_55 (championship VARCHAR, outcome VARCHAR, opponents_in_final VARCHAR)", "sql": "SELECT championship FROM table_name_55 WHERE outcome = 'runner-up' AND opponents_in_final = 'gigi fernández natalia zvereva';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player played in 2007?", "schema": "CREATE TABLE table_name_17 (player VARCHAR, season VARCHAR)", "sql": "SELECT player FROM table_name_17 WHERE season = '2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Find the second-highest salary in the 'administrative' department?", "schema": "CREATE TABLE departments (id INT, name VARCHAR(50)); CREATE TABLE employees (id INT, name VARCHAR(50), dept_id INT, salary DECIMAL(10, 2));", "sql": "SELECT salary FROM (SELECT salary FROM employees WHERE dept_id = (SELECT id FROM departments WHERE name = 'administrative') ORDER BY salary DESC LIMIT 2) t ORDER BY salary LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "How many traditional arts are practiced in Oceania?", "schema": "CREATE TABLE traditional_arts (id INT, name TEXT, type TEXT, region TEXT); INSERT INTO traditional_arts (id, name, type, region) VALUES (1, 'Aboriginal Rock Art', 'Painting', 'Oceania');", "sql": "SELECT COUNT(*) FROM traditional_arts WHERE region = 'Oceania';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average ticket price for each concert in 'music_festivals' table?", "schema": "CREATE TABLE music_festivals (festival_id INT, concert_name VARCHAR(255), location VARCHAR(255), date DATE, ticket_price DECIMAL(5,2));", "sql": "SELECT concert_name, AVG(ticket_price) FROM music_festivals GROUP BY concert_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 262).", "schema": null, "sql": "INSERT INTO PKTABLE VALUES(42);", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who wrote the episode that was directed by Milan Cheylov?", "schema": "CREATE TABLE table_29219286_1 (written_by VARCHAR, directed_by VARCHAR)", "sql": "SELECT written_by FROM table_29219286_1 WHERE directed_by = 'Milan Cheylov';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average labor cost for manufacturing garments with the 'silk' fabric type?", "schema": "CREATE TABLE garment_manufacturing (id INT PRIMARY KEY, garment_id INT, manufacturing_date DATE, labor_hours INT, labor_cost DECIMAL(5,2)); INSERT INTO garment_manufacturing (id, garment_id, manufacturing_date, labor_hours, labor_cost) VALUES (1, 1001, '2022-01-03', 2.5, 25.00), (2, 1002, '2022-01-04', 3.0, 30.00); CREATE TABLE fabric_garment (id INT PRIMARY KEY, fabric_type VARCHAR(50), garment_id INT); INSERT INTO fabric_garment (id, fabric_type, garment_id) VALUES (1, 'denim', 1001), (2, 'silk', 1002);", "sql": "SELECT AVG(gm.labor_cost) AS avg_labor_cost FROM garment_manufacturing gm JOIN fabric_garment fg ON gm.garment_id = fg.garment_id WHERE fg.fabric_type = 'silk';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the rebounds are at 87, what are the amount of steals?", "schema": "CREATE TABLE table_25342713_5 (steals VARCHAR, rebounds VARCHAR)", "sql": "SELECT steals FROM table_25342713_5 WHERE rebounds = 87;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Calculate the average age difference between athletes in basketball and football.", "schema": "CREATE TABLE athletes(athlete_id INT, name VARCHAR(50), age INT, sport VARCHAR(20));", "sql": "SELECT AVG(basketball_age - football_age) AS avg_age_difference FROM (SELECT AVG(age) AS basketball_age FROM athletes WHERE sport = 'basketball') AS basketball, (SELECT AVG(age) AS football_age FROM athletes WHERE sport = 'football') AS football;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 246, "num_statements": 1} {"question": "What is the number of patients treated for mental health issues by each healthcare provider?", "schema": "CREATE TABLE Patients (PatientID int, ProviderID int, MentalHealthIssue int);CREATE TABLE Providers (ProviderID int, ProviderName varchar(50));", "sql": "SELECT ProviderName, COUNT(PatientID) as PatientCount FROM Patients JOIN Providers ON Patients.ProviderID = Providers.ProviderID GROUP BY ProviderID, ProviderName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "PostgreSQL regression test 'foreign_data': Write the SELECT query (example 247).", "schema": null, "sql": "SELECT * FROM information_schema.foreign_table_options ORDER BY 1, 2, 3, 4;", "explanation": "Regression test for Foreign Data in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM information_schema.foreign_table_options ORDER BY 1, 2, 3, 4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 75, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 46).", "schema": null, "sql": "CREATE VIEW v8 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the last year MCA had a Release?", "schema": "CREATE TABLE table_name_40 (year_of_release INTEGER, label VARCHAR)", "sql": "SELECT MAX(year_of_release) FROM table_name_40 WHERE label = 'mca';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many individuals received support in each country?", "schema": "CREATE TABLE SupportRecipients (Id INT, Country VARCHAR(50), SupportType VARCHAR(50)); INSERT INTO SupportRecipients (Id, Country, SupportType) VALUES (1, 'Syria', 'Food'), (2, 'Bangladesh', 'Shelter'), (3, 'Syria', 'Medical'), (4, 'Brazil', 'Food');", "sql": "SELECT Country, COUNT(*) FROM SupportRecipients GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the revenue from medical sales in Colorado in Q2 2022?", "schema": "CREATE TABLE Sales (id INT, dispensary TEXT, state TEXT, sale_type TEXT, revenue INT); INSERT INTO Sales (id, dispensary, state, sale_type, revenue) VALUES (1, 'Bud Mart', 'CO', 'medical', 15000), (2, 'Green Earth', 'WA', 'recreational', 20000), (3, 'Emerald City', 'WA', 'medical', 10000);", "sql": "SELECT SUM(revenue) as q2_medical_revenue FROM Sales WHERE state = 'CO' AND sale_type = 'medical' AND quarter(order_date) = 2 AND year(order_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.utf8': Write the SELECT query (example 35).", "schema": null, "sql": "SELECT '൧' ~ '\\d' COLLATE PG_UNICODE_FAST;", "explanation": "Regression test for Collate.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '൧' ~ '\\d' COLLATE PG_UNICODE_FAST) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 47).", "schema": null, "sql": "SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 = '1004.3';", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f.* FROM FLOAT4_TBL f WHERE f.f1 = '1004.3') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Identify the conservation status and number of protected areas for marine species in the Indian Ocean.", "schema": "CREATE TABLE marine_species_indian_ocean (species_name VARCHAR(255), habitat VARCHAR(255)); INSERT INTO marine_species_indian_ocean (species_name, habitat) VALUES ('Whale Shark', 'Indian Ocean'), ('Dugong', 'Indian Ocean'); CREATE TABLE conservation_efforts_indian_ocean (species_name VARCHAR(255), conservation_status VARCHAR(255), protected_areas INT); INSERT INTO conservation_efforts_indian_ocean (species_name, conservation_status, protected_areas) VALUES ('Whale Shark', 'Vulnerable', 25), ('Dugong', 'Endangered', 15);", "sql": "SELECT m.species_name, c.conservation_status, COUNT(c.protected_areas) AS protected_areas_count FROM marine_species_indian_ocean m INNER JOIN conservation_efforts_indian_ocean c ON m.species_name = c.species_name GROUP BY m.species_name, c.conservation_status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 260, "num_statements": 1} {"question": "What is the total number of buses in the 'transportation' schema?", "schema": "CREATE SCHEMA transportation; CREATE TABLE transportation.buses (id INT PRIMARY KEY, model VARCHAR(255), year INT); INSERT INTO transportation.buses (id, model, year) VALUES (1, 'Mini Bus', 2015), (2, 'City Bus', 2018), (3, 'Electric Bus', 2020);", "sql": "SELECT COUNT(*) FROM transportation.buses;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the average number of posts per user in the users and posts tables?", "schema": "CREATE TABLE users (id INT, name VARCHAR(50)); INSERT INTO users (id, name) VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'); CREATE TABLE posts (id INT, user_id INT, content TEXT); INSERT INTO posts (id, user_id, content) VALUES (1, 1, 'Hello'), (2, 1, 'World'), (3, 2, 'SQL');", "sql": "SELECT AVG(post_per_user) FROM (SELECT COUNT(p.id) AS post_per_user FROM posts p JOIN users u ON p.user_id = u.id GROUP BY u.id) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Insert records of new recruits who joined the army in 2023 into the army_recruits table", "schema": "CREATE TABLE army_recruits (recruit_id INT, name VARCHAR(50), rank VARCHAR(50), join_date DATE);", "sql": "INSERT INTO army_recruits (recruit_id, name, rank, join_date) VALUES (1, 'Minh Nguyen', 'Private', '2023-04-01'), (2, 'Aisha Said', 'Private', '2023-07-15'), (3, 'Konstantinos Papadopoulos', 'Private', '2023-11-27');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "How many refugees were supported by each organization by age group and gender in Q1 2021?", "schema": "CREATE TABLE refugee_support (refugee_id INT, organization_id INT, age INT, gender VARCHAR(10), support_date DATE); INSERT INTO refugee_support (refugee_id, organization_id, age, gender, support_date) VALUES (1, 201, 23, 'Male', '2021-01-12'), (2, 201, 30, 'Female', '2021-02-03'), (3, 202, 18, 'Male', '2021-03-25'), (4, 203, 45, 'Female', '2021-01-05');", "sql": "SELECT organization_id, gender, age_group, COUNT(*) as supported_refugees FROM (SELECT organization_id, gender, CASE WHEN age <= 17 THEN 'Minor' WHEN age BETWEEN 18 AND 64 THEN 'Adult' ELSE 'Senior' END as age_group FROM refugee_support WHERE EXTRACT(QUARTER FROM support_date) = 1 AND EXTRACT(YEAR FROM support_date) = 2021) t GROUP BY organization_id, gender, age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 372, "num_statements": 1} {"question": "What is the average yield for strains in each category?", "schema": "CREATE TABLE strains (id INT, name TEXT, category TEXT, yield FLOAT); INSERT INTO strains (id, name, category, yield) VALUES (1, 'Purple Kush', 'Indica', 0.5), (2, 'Northern Lights', 'Indica', 0.6), (3, 'Granddaddy Purple', 'Indica', 0.7), (4, 'Sour Diesel', 'Sativa', 0.6), (5, 'Blue Dream', 'Hybrid', 0.7), (6, 'Green Crack', 'Sativa', 0.8), (7, 'OG Kush', 'Hybrid', 0.9);", "sql": "SELECT category, AVG(yield) FROM strains GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Find the average depth of marine life research sites in the Southern Hemisphere", "schema": "CREATE TABLE marine_sites (site_id INT, site_name VARCHAR(255), longitude DECIMAL(9,6), latitude DECIMAL(9,6), depth DECIMAL(5,2)); CREATE VIEW southern_hemisphere_sites AS SELECT * FROM marine_sites WHERE latitude BETWEEN -90 AND 0;", "sql": "SELECT AVG(depth) FROM southern_hemisphere_sites;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 163).", "schema": null, "sql": "SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE)\nFROM generate_series(0, 199) i;", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE)\nFROM generate_series(0, 199) i) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the earliest date for the album that had a catalog number of 3645, was formatted as a cd and was under the luaka bop label?", "schema": "CREATE TABLE table_name_2 (date INTEGER, label VARCHAR, catalog VARCHAR, format VARCHAR)", "sql": "SELECT MIN(date) FROM table_name_2 WHERE catalog = '3645' AND format = 'cd' AND label = 'luaka bop';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "How many exploratory wells have been drilled in 'Alberta' since 2020?", "schema": "CREATE TABLE wells (id VARCHAR(10), name VARCHAR(10), type VARCHAR(20), region VARCHAR(20)); INSERT INTO wells (id, name, type, region) VALUES ('W004', 'D', 'exploratory', 'Alberta'), ('W005', 'E', 'production', 'Alberta');", "sql": "SELECT COUNT(*) FROM wells WHERE type = 'exploratory' AND region = 'Alberta' AND date >= '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the average hourly wage for male workers?", "schema": "CREATE TABLE HourlyWageData (EmployeeID INT, Gender VARCHAR(10), HourlyWage DECIMAL(10, 2)); INSERT INTO HourlyWageData (EmployeeID, Gender, HourlyWage) VALUES (1, 'Male', 30.00), (2, 'Female', 25.00), (3, 'Male', 35.00);", "sql": "SELECT AVG(HourlyWage) FROM HourlyWageData WHERE Gender = 'Male';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Date has the Region Europe and a Catalog of 74321 45851 2?", "schema": "CREATE TABLE table_name_33 (date VARCHAR, region VARCHAR, catalog VARCHAR)", "sql": "SELECT date FROM table_name_33 WHERE region = 'europe' AND catalog = '74321 45851 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 154).", "schema": null, "sql": "CREATE INDEX botharrayidx ON array_index_op_test USING gin (i, t);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the district where the result is re-elected and the incumbent is richard kelly?", "schema": "CREATE TABLE table_1341663_10 (district VARCHAR, result VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1341663_10 WHERE result = 'Re-elected' AND incumbent = 'Richard Kelly';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "List all countries and their total marine conservation area ('mca') size in square kilometers.", "schema": "CREATE TABLE country (id INT, name VARCHAR(50)); CREATE TABLE mca (id INT, country_id INT, name VARCHAR(50), size_sqkm FLOAT); INSERT INTO country (id, name) VALUES (1, 'Australia'), (2, 'Canada'); INSERT INTO mca (id, country_id, name, size_sqkm) VALUES (1, 1, 'Great Barrier Reef', 344400), (2, 2, 'Pacific Rim National Park', 51800);", "sql": "SELECT country.name, SUM(mca.size_sqkm) FROM country INNER JOIN mca ON country.id = mca.country_id GROUP BY country.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 77).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 0597');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Premier League Manager has an Adidas sponsor and a Newcastle United club?", "schema": "CREATE TABLE table_name_52 (manager VARCHAR, manufacturer VARCHAR, club VARCHAR)", "sql": "SELECT manager FROM table_name_52 WHERE manufacturer = 'adidas' AND club = 'newcastle united';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total volume of timber harvested in each region for 2020?", "schema": "CREATE TABLE harvest (year INT, region VARCHAR(255), volume FLOAT); INSERT INTO harvest (year, region, volume) VALUES (2017, 'Northeast', 1230.5), (2018, 'Northeast', 1405.8), (2019, 'Northeast', 1567.9), (2017, 'Southeast', 2600.1), (2018, 'Southeast', 2890.5), (2019, 'Southeast', 3100.7), (2017, 'Midwest', 1001.2), (2018, 'Midwest', 1120.5), (2019, 'Midwest', 1215.6), (2017, 'West', 3500.9), (2018, 'West', 3750.6), (2019, 'West', 4001.5);", "sql": "SELECT region, SUM(volume) AS total_volume FROM harvest WHERE year = 2020 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many journalists are there?", "schema": "CREATE TABLE journalist (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM journalist;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Monday Mona/Mani also had a Thursday Thunor/Thor of Tongersdei?", "schema": "CREATE TABLE table_name_84 (monday_mona__máni VARCHAR, thursday_thunor___thor VARCHAR)", "sql": "SELECT monday_mona__máni FROM table_name_84 WHERE thursday_thunor___thor = 'tongersdei';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL regression test 'collate': Write the SELECT query (example 57).", "schema": null, "sql": "SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail\n\nSELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;", "explanation": "Regression test for Collate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail\n\nSELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 148, "num_statements": 2} {"question": "What is the minimum depth at which the Greenland Shark is found?", "schema": "CREATE TABLE shark_depths (shark VARCHAR(255), min_depth FLOAT); INSERT INTO shark_depths (shark, min_depth) VALUES ('Greenland Shark', 2000.0), ('Hammerhead Shark', 100.0);", "sql": "SELECT min_depth FROM shark_depths WHERE shark = 'Greenland Shark';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"degree\" : 1.000}]', 'pg_dependencies');", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"degree\" : 1.000}]', 'pg_dependencies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "List the carbon sequestration rates for all forests in 'Region C'.", "schema": "CREATE TABLE ForestCarbonSeq(forest_name TEXT, carbon_seq_rate REAL, region TEXT); INSERT INTO ForestCarbonSeq (forest_name, carbon_seq_rate, region) VALUES ('Forest 1', 5.6, 'Region C'), ('Forest 2', 6.3, 'Region D'), ('Forest 3', 4.8, 'Region C');", "sql": "SELECT carbon_seq_rate FROM ForestCarbonSeq WHERE region = 'Region C';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'update' (example 132).", "schema": null, "sql": "CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0);", "explanation": "DDL from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 97, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 683).", "schema": null, "sql": "select jsonb_path_query('\"12:34:56+3\"', '$.datetime().type()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"12:34:56+3\"', '$.datetime().type()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average price of fair trade coffee beans sold by each retailer?", "schema": "CREATE TABLE retailers (retailer_id INT, retailer_name TEXT);CREATE TABLE products (product_id INT, product_name TEXT, is_fair_trade BOOLEAN, product_category TEXT, price INT);CREATE TABLE inventory (retailer_id INT, product_id INT);", "sql": "SELECT retailers.retailer_name, AVG(products.price) as avg_price FROM retailers JOIN inventory ON retailers.retailer_id = inventory.retailer_id JOIN products ON inventory.product_id = products.product_id WHERE products.is_fair_trade = TRUE AND products.product_category = 'coffee beans' GROUP BY retailers.retailer_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 320, "num_statements": 1} {"question": "PostgreSQL regression test 'rules': Write the SELECT query (example 517).", "schema": null, "sql": "SELECT tablename, rulename, definition FROM pg_rules\n\tWHERE tablename = 'hats';", "explanation": "Regression test for Rules in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tablename, rulename, definition FROM pg_rules\n\tWHERE tablename = 'hats') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the prevalence of heart disease in rural areas of Texas compared to urban areas?", "schema": "CREATE TABLE heart_disease (patient_id INT, age INT, gender VARCHAR(20), location VARCHAR(20));", "sql": "SELECT (COUNT(*) FILTER (WHERE location = 'Rural Texas'))::float / COUNT(*) FROM heart_disease;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "What is the percentage of electric vehicles sold in North America in 2020?", "schema": "CREATE TABLE VehicleSales (id INT, vehicle_type VARCHAR(255), sale_date DATE, units_sold INT, country VARCHAR(255)); INSERT INTO VehicleSales (id, vehicle_type, sale_date, units_sold, country) VALUES (1, 'Gasoline', '2017-01-01', 500, 'USA'); INSERT INTO VehicleSales (id, vehicle_type, sale_date, units_sold, country) VALUES (2, 'Electric', '2020-01-01', 800, 'Canada');", "sql": "SELECT (COUNT(*) FILTER (WHERE vehicle_type = 'Electric')) * 100.0 / COUNT(*) FROM VehicleSales WHERE sale_date >= '2020-01-01' AND country IN ('USA', 'Canada');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the country of player ian woosnam?", "schema": "CREATE TABLE table_name_93 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_93 WHERE player = 'ian woosnam';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Delete the record of the 'Vanguard 1' satellite", "schema": "CREATE TABLE space_debris (name TEXT, type TEXT, location TEXT); INSERT INTO space_debris (name, type, location) VALUES ('Vanguard 1', 'Satellite', 'Low Earth Orbit');", "sql": "DELETE FROM space_debris WHERE name = 'Vanguard 1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the percentage of AI safety incidents related to data privacy in each country?", "schema": "CREATE TABLE safety_incidents (incident_id INT, country VARCHAR(50), incident_type VARCHAR(50)); INSERT INTO safety_incidents (incident_id, country, incident_type) VALUES (1, 'CountryA', 'Data Privacy'), (2, 'CountryB', 'Model Malfunction'), (3, 'CountryA', 'Data Privacy');", "sql": "SELECT country, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM safety_incidents) as pct_data_privacy_incidents FROM safety_incidents WHERE incident_type = 'Data Privacy' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the average mental health score of students who have participated in professional development programs?", "schema": "CREATE TABLE students (student_id INT, mental_health_score INT, participated_in_pd BOOLEAN); INSERT INTO students (student_id, mental_health_score, participated_in_pd) VALUES (1, 75, true), (2, 80, false), (3, 60, true);", "sql": "SELECT AVG(mental_health_score) FROM students WHERE participated_in_pd = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the nurse has the most appointments?", "schema": "CREATE TABLE nurse (name VARCHAR, employeeid VARCHAR); CREATE TABLE appointment (prepnurse VARCHAR)", "sql": "SELECT T1.name FROM nurse AS T1 JOIN appointment AS T2 ON T1.employeeid = T2.prepnurse GROUP BY T1.employeeid ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the amount of the 1st prize when the Winner was ken green (4)?", "schema": "CREATE TABLE table_name_56 (winner VARCHAR)", "sql": "SELECT 1 AS st_prize___$__ FROM table_name_56 WHERE winner = 'ken green (4)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the away team on 21 september?", "schema": "CREATE TABLE table_name_42 (away_team VARCHAR, date VARCHAR)", "sql": "SELECT away_team FROM table_name_42 WHERE date = '21 september';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many unique strains were available in Colorado in 2020 and 2021?", "schema": "CREATE TABLE strains (id INT, state VARCHAR(50), year INT, strain VARCHAR(50)); INSERT INTO strains (id, state, year, strain) VALUES (1, 'Colorado', 2020, 'Blue Dream'), (2, 'Colorado', 2021, 'Green Crack'), (3, 'California', 2020, 'Sour Diesel');", "sql": "SELECT COUNT(DISTINCT strain) FROM strains WHERE state = 'Colorado' AND (year = 2020 OR year = 2021);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What rank does the person participating in American Civil war and indian wars?", "schema": "CREATE TABLE table_name_65 (rank VARCHAR, active_service VARCHAR)", "sql": "SELECT rank FROM table_name_65 WHERE active_service = 'american civil war and indian wars';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "List all unique marine species observed by expeditions, ordered alphabetically.", "schema": "CREATE TABLE expedition (org VARCHAR(20), species VARCHAR(50)); INSERT INTO expedition VALUES ('Ocean Explorer', 'Dolphin'), ('Ocean Explorer', 'Tuna'), ('Sea Discoverers', 'Shark'), ('Sea Discoverers', 'Whale'), ('Marine Investigators', 'Starfish');", "sql": "SELECT DISTINCT species FROM expedition ORDER BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Insert a new outreach event into the 'events' table", "schema": "CREATE TABLE events (id INT PRIMARY KEY, site_id INT, date DATE, attendees INT, notes TEXT);", "sql": "INSERT INTO events (id, site_id, date, attendees, notes) VALUES (1, 1, '2023-06-10', 200, 'Lecture on Pompeii frescoes');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the average sustainability score of suppliers that provide linen?", "schema": "CREATE TABLE supplier_sustainability (supplier_id INT, name TEXT, sustainability_score INT); INSERT INTO supplier_sustainability (supplier_id, name, sustainability_score) VALUES (1, 'Supplier A', 85), (2, 'Supplier B', 90), (3, 'Supplier C', 70), (4, 'Supplier D', 60), (5, 'Supplier E', 50), (6, 'Supplier F', 95), (7, 'Supplier G', 80); CREATE TABLE supplier_materials (supplier_id INT, material TEXT); INSERT INTO supplier_materials (supplier_id, material) VALUES (3, 'linen'), (6, 'linen'), (7, 'linen');", "sql": "SELECT AVG(sustainability_score) FROM supplier_sustainability s JOIN supplier_materials m ON s.supplier_id = m.supplier_id WHERE material = 'linen';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the number of teachers who have completed professional development courses on open pedagogy in urban and rural areas?", "schema": "CREATE TABLE teacher_pd (teacher_id INT, location VARCHAR(50), course VARCHAR(50)); INSERT INTO teacher_pd (teacher_id, location, course) VALUES (1, 'Urban', 'Open Pedagogy 101'), (2, 'Rural', 'Open Pedagogy 101'), (3, 'Urban', 'Open Pedagogy 202');", "sql": "SELECT location, COUNT(DISTINCT teacher_id) as num_teachers FROM teacher_pd WHERE course = 'Open Pedagogy 101' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the average cultural competency score of community health workers by state?", "schema": "CREATE TABLE states (state_id INT, state_name VARCHAR(100)); INSERT INTO states (state_id, state_name) VALUES (1, 'California'), (2, 'Texas'), (3, 'New York'); CREATE TABLE community_health_workers (worker_id INT, state_id INT, cultural_competency_score INT); INSERT INTO community_health_workers (worker_id, state_id, cultural_competency_score) VALUES (1, 1, 85), (2, 1, 90), (3, 2, 80), (4, 3, 95), (5, 1, 92);", "sql": "SELECT S.state_name, AVG(cultural_competency_score) as avg_score FROM community_health_workers CHW JOIN states S ON CHW.state_id = S.state_id GROUP BY S.state_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "What is the total military equipment sales revenue for contractor Y?", "schema": "CREATE TABLE revenue(id INT, contractor VARCHAR(50), revenue NUMERIC);", "sql": "SELECT SUM(revenue) FROM revenue WHERE contractor = 'Y';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many public schools are there in the city of Chicago, and what are their names?", "schema": "CREATE TABLE PublicSchools (SchoolID INT, SchoolName VARCHAR(100), City VARCHAR(100)); INSERT INTO PublicSchools (SchoolID, SchoolName, City) VALUES (1, 'Johnson Elementary School', 'Chicago'), (2, 'Washington High School', 'Chicago'), (3, 'Lincoln Middle School', 'Chicago');", "sql": "SELECT COUNT(*) as NumberOfSchools, SchoolName FROM PublicSchools WHERE City = 'Chicago' GROUP BY SchoolName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many losses are there for team tembetary?", "schema": "CREATE TABLE table_18703133_6 (losses VARCHAR, team VARCHAR)", "sql": "SELECT losses FROM table_18703133_6 WHERE team = 'Tembetary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which manufacturer is grid 11?", "schema": "CREATE TABLE table_name_93 (manufacturer VARCHAR, grid VARCHAR)", "sql": "SELECT manufacturer FROM table_name_93 WHERE grid = '11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the number of peacekeeping operations participated in by each country in the 'peacekeeping' table, excluding those with less than 3 operations, ordered by the number of operations in ascending order?", "schema": "CREATE TABLE peacekeeping (id INT, country VARCHAR(50), num_operations INT);", "sql": "SELECT country, COUNT(*) as num_operations FROM peacekeeping GROUP BY country HAVING COUNT(*) >= 3 ORDER BY num_operations ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the total socially responsible lending for each country?", "schema": "CREATE TABLE socially_responsible_lending_data (lending_id INT, amount DECIMAL(15, 2), country VARCHAR(50)); INSERT INTO socially_responsible_lending_data (lending_id, amount, country) VALUES (1, 5000000, 'Brazil'), (2, 7000000, 'India'), (3, 6000000, 'South Africa'), (4, 8000000, 'Indonesia'), (5, 9000000, 'China'); CREATE VIEW socially_responsible_lending_view AS SELECT country, SUM(amount) as total_lending FROM socially_responsible_lending_data GROUP BY country;", "sql": "SELECT country, total_lending FROM socially_responsible_lending_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Delete a community education program from the 'education' table", "schema": "CREATE TABLE education (id INT PRIMARY KEY, title VARCHAR(50), description TEXT, attendees INT, date DATE);", "sql": "DELETE FROM education WHERE title = 'Rainforest Restoration';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "How many people have benefited from digital divide initiatives in Africa in the last 5 years?", "schema": "CREATE TABLE Digital_Divide_Initiatives (Year INT, Beneficiaries INT);", "sql": "SELECT SUM(Beneficiaries) FROM Digital_Divide_Initiatives WHERE Year BETWEEN 2016 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most bits 14-12 for output from accumulator to character bus", "schema": "CREATE TABLE table_14249278_1 (bits_14_12 INTEGER, description VARCHAR)", "sql": "SELECT MIN(bits_14_12) FROM table_14249278_1 WHERE description = 'Output from accumulator to character bus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Calculate the total investment amount per ESG category.", "schema": "CREATE TABLE esg_categories (id INT, category TEXT, description TEXT, total_investment FLOAT); INSERT INTO esg_categories (id, category, description, total_investment) VALUES (1, 'E', 'Environmental', 30000), (2, 'S', 'Social', 25000), (3, 'G', 'Governance', 45000);", "sql": "SELECT category, SUM(total_investment) FROM esg_categories GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which track has a Japanese title of メロディー?", "schema": "CREATE TABLE table_name_12 (track INTEGER, japanese_title VARCHAR)", "sql": "SELECT MAX(track) FROM table_name_12 WHERE japanese_title = 'メロディー';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the play when the company is national theatre of greece?", "schema": "CREATE TABLE table_name_11 (play VARCHAR, company VARCHAR)", "sql": "SELECT play FROM table_name_11 WHERE company = 'national theatre of greece';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date had a margin victory of 2 strokes?", "schema": "CREATE TABLE table_name_91 (date VARCHAR, margin_of_victory VARCHAR)", "sql": "SELECT date FROM table_name_91 WHERE margin_of_victory = '2 strokes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Laps have a Finish of 15?", "schema": "CREATE TABLE table_name_30 (laps VARCHAR, finish VARCHAR)", "sql": "SELECT laps FROM table_name_30 WHERE finish = '15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest score of round 1 where they also shot 90 in round 2", "schema": "CREATE TABLE table_14708760_3 (round_1 INTEGER, round_2 VARCHAR)", "sql": "SELECT MAX(round_1) FROM table_14708760_3 WHERE round_2 = 90;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the VFL played Victoria Park what was the home team score?", "schema": "CREATE TABLE table_name_11 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_11 WHERE venue = 'victoria park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Sydney's Melbourne was no, when Auckland was yes?", "schema": "CREATE TABLE table_name_94 (sydney VARCHAR, melbourne VARCHAR, auckland VARCHAR)", "sql": "SELECT sydney FROM table_name_94 WHERE melbourne = 'no' AND auckland = 'yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the trend of mental health scores by race/ethnicity?", "schema": "CREATE TABLE student_mental_health (student_id INT, score INT, race_ethnicity VARCHAR(20)); INSERT INTO student_mental_health (student_id, score, race_ethnicity) VALUES (1, 80, 'Asian'), (1, 85, 'Asian'), (2, 70, 'Hispanic'), (2, 75, 'Hispanic'), (3, 90, 'African American'), (3, 95, 'African American');", "sql": "SELECT race_ethnicity, AVG(score) as avg_score FROM student_mental_health GROUP BY race_ethnicity ORDER BY race_ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What is the average speed of shared electric scooters in Seattle and Portland?", "schema": "CREATE TABLE shared_scooters (scooter_id INT, city VARCHAR(20), avg_speed DECIMAL(5,2)); INSERT INTO shared_scooters (scooter_id, city, avg_speed) VALUES (1, 'Seattle', 12.5), (2, 'Seattle', 14.3), (3, 'Portland', 13.9), (4, 'Portland', 11.8);", "sql": "SELECT AVG(avg_speed) FROM shared_scooters WHERE city IN ('Seattle', 'Portland') GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Find the average daily production of oil for each platform in Q4 2020", "schema": "CREATE TABLE platform_production_figures (platform_id INT, production_date DATE, oil_production FLOAT);", "sql": "SELECT platform_id, AVG(oil_production) as avg_oil_production FROM platform_production_figures WHERE production_date BETWEEN '2020-10-01' AND '2020-12-31' GROUP BY platform_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "What was the success rate of legal aid clinics in different regions?", "schema": "CREATE TABLE legal_aid (clinic_name VARCHAR(20), region VARCHAR(20), success_rate DECIMAL(3,2)); INSERT INTO legal_aid (clinic_name, region, success_rate) VALUES ('Clinic A', 'Northeast', 0.85), ('Clinic B', 'Southeast', 0.70), ('Clinic C', 'Midwest', 0.90);", "sql": "SELECT region, AVG(success_rate) as avg_success_rate FROM legal_aid GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What horse has the number 25?", "schema": "CREATE TABLE table_20095300_1 (name VARCHAR, number VARCHAR)", "sql": "SELECT name FROM table_20095300_1 WHERE number = 25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 104).", "schema": null, "sql": "SELECT ln(f.f1) from FLOAT8_TBL f where f.f1 = '0.0' ;", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ln(f.f1) from FLOAT8_TBL f where f.f1 = '0.0' ) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of the airport in the city of Goroka.", "schema": "CREATE TABLE airports (name VARCHAR, city VARCHAR)", "sql": "SELECT name FROM airports WHERE city = 'Goroka';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Calculate total assets by investment type and region.", "schema": "CREATE TABLE investments (id INT, type VARCHAR(255), region VARCHAR(255), amount FLOAT); INSERT INTO investments VALUES (1, 'Stocks', 'Asia', 50000), (2, 'Bonds', 'Europe', 75000), (3, 'Real Estate', 'Americas', 100000);", "sql": "SELECT type, SUM(amount) as total_assets, region FROM investments GROUP BY type, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which details has the out of line label and the year of 2005?", "schema": "CREATE TABLE table_name_9 (details VARCHAR, label VARCHAR, year VARCHAR)", "sql": "SELECT details FROM table_name_9 WHERE label = 'out of line' AND year = 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "How many electric vehicles are sold in China per month?", "schema": "CREATE TABLE CNElectricVehicleSales (id INT, date DATE, sales INT);", "sql": "SELECT DATE_FORMAT(date, '%Y-%m') as month, SUM(sales) FROM CNElectricVehicleSales GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_aggregate' (example 89).", "schema": null, "sql": "CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int);", "explanation": "DDL from PostgreSQL core regression test for Partition Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What shows for laps when the Time/Retired was +1:08.577?", "schema": "CREATE TABLE table_name_48 (laps VARCHAR, time_retired VARCHAR)", "sql": "SELECT laps FROM table_name_48 WHERE time_retired = '+1:08.577';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date sent has orion as the constellation?", "schema": "CREATE TABLE table_name_63 (date_sent VARCHAR, constellation VARCHAR)", "sql": "SELECT date_sent FROM table_name_63 WHERE constellation = 'orion';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In games where st kilda was the away team, what was the smallest crowd?", "schema": "CREATE TABLE table_name_68 (crowd INTEGER, away_team VARCHAR)", "sql": "SELECT MIN(crowd) FROM table_name_68 WHERE away_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What car has a March 90ca Chassis?", "schema": "CREATE TABLE table_name_44 (engine VARCHAR, chassis VARCHAR)", "sql": "SELECT engine FROM table_name_44 WHERE chassis = 'march 90ca';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the scorers in the game against neuchâtel xamax played on 10 December 1985?", "schema": "CREATE TABLE table_name_64 (scorers VARCHAR, opponent VARCHAR, date VARCHAR)", "sql": "SELECT scorers FROM table_name_64 WHERE opponent = 'neuchâtel xamax' AND date = '10 december 1985';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Which country produced the most Dysprosium in 2020?", "schema": "CREATE TABLE yearly_production (country VARCHAR(255), element VARCHAR(255), year INT, production INT); INSERT INTO yearly_production (country, element, year, production) VALUES ('China', 'Dysprosium', 2020, 1200), ('Australia', 'Dysprosium', 2020, 800), ('United States', 'Dysprosium', 2020, 500);", "sql": "SELECT country, MAX(production) as max_production FROM yearly_production WHERE element = 'Dysprosium' AND year = 2020 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "What are the average warehouse management costs for each warehouse in Q2 2022?", "schema": "CREATE TABLE warehouse_costs (warehouse_id INT, warehouse_location VARCHAR(255), cost DECIMAL(10,2), quarter INT, year INT); INSERT INTO warehouse_costs (warehouse_id, warehouse_location, cost, quarter, year) VALUES (1, 'NYC Warehouse', 2500.00, 2, 2022), (2, 'LA Warehouse', 3000.00, 2, 2022), (3, 'CHI Warehouse', 2000.00, 2, 2022);", "sql": "SELECT warehouse_location, AVG(cost) as avg_cost FROM warehouse_costs WHERE quarter = 2 AND year = 2022 GROUP BY warehouse_location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Find the ratio of R&D expenditures between 'AstraZeneca' and 'Novartis'.", "schema": "CREATE TABLE rd_expenditures (company TEXT, year INT, amount FLOAT); INSERT INTO rd_expenditures (company, year, amount) VALUES ('AstraZeneca', 2020, 22000000), ('Novartis', 2020, 16000000);", "sql": "SELECT (SELECT SUM(amount) FROM rd_expenditures WHERE company = 'AstraZeneca' AND year = 2020) / (SELECT SUM(amount) FROM rd_expenditures WHERE company = 'Novartis' AND year = 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 149).", "schema": null, "sql": "SELECT a FROM pgss_schema_2.tab_search_diff_2;", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Identify the programs with the largest decrease in volunteers from the previous month.", "schema": "CREATE TABLE volunteer_changes (program TEXT, month INT, num_volunteers INT); INSERT INTO volunteer_changes VALUES ('Feeding Program', 1, 10), ('Education Program', 1, 15), ('Feeding Program', 2, 8), ('Education Program', 2, 12), ('Feeding Program', 3, 12), ('Education Program', 3, 16);", "sql": "SELECT program, num_volunteers, LAG(num_volunteers) OVER (PARTITION BY program ORDER BY month) as previous_month_volunteers, num_volunteers - LAG(num_volunteers) OVER (PARTITION BY program ORDER BY month) as volunteer_change FROM volunteer_changes;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 248, "num_statements": 1} {"question": "What is the total number of posts made by users from 'Jakarta' and 'Tokyo'?", "schema": "CREATE TABLE posts (id INT PRIMARY KEY, user_id INT, content TEXT, post_date DATE, city VARCHAR(50)); INSERT INTO posts (id, user_id, content, post_date, city) VALUES (1, 1, 'Hello World!', '2021-01-01', 'Jakarta'); INSERT INTO posts (id, user_id, content, post_date, city) VALUES (2, 2, 'I love data!', '2021-01-02', 'Tokyo'); INSERT INTO posts (id, user_id, content, post_date, city) VALUES (3, 3, 'Privacy matters!', '2021-01-03', 'Jakarta');", "sql": "SELECT city, COUNT(*) as total_posts FROM posts WHERE city IN ('Jakarta', 'Tokyo') GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total amount of climate finance invested in renewable energy in Europe?", "schema": "CREATE TABLE FinanceInvestment (Country TEXT, Sector TEXT, Investment_Amount NUMERIC); INSERT INTO FinanceInvestment (Country, Sector, Investment_Amount) VALUES ('France', 'Renewable Energy', 3000000), ('Germany', 'Renewable Energy', 4000000), ('Spain', 'Renewable Energy', 5000000);", "sql": "SELECT SUM(Investment_Amount) FROM FinanceInvestment WHERE Country IN ('France', 'Germany', 'Spain') AND Sector = 'Renewable Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Theme of Christie Paquet after 2004 with an Issue Price of $34.95?", "schema": "CREATE TABLE table_name_54 (theme VARCHAR, year VARCHAR, artist VARCHAR, issue_price VARCHAR)", "sql": "SELECT theme FROM table_name_54 WHERE artist = 'christie paquet' AND issue_price = '$34.95' AND year > 2004;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of players who had a money list rank of 96?", "schema": "CREATE TABLE table_20590020_2 (best_finish VARCHAR, money_list_rank VARCHAR)", "sql": "SELECT COUNT(best_finish) FROM table_20590020_2 WHERE money_list_rank = 96;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which record has a score of 135–134?", "schema": "CREATE TABLE table_name_7 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_7 WHERE score = '135–134';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total number of years than had best improved singer (躍進歌手)?", "schema": "CREATE TABLE table_name_52 (year INTEGER, category VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_52 WHERE category = 'best improved singer (躍進歌手)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "List the teams that have more than 15 players.", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(50), age INT, position VARCHAR(50), team VARCHAR(50));", "sql": "SELECT team FROM players GROUP BY team HAVING COUNT(*) > 15;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total waste generation by city for the year 2021 for the state of New York?'", "schema": "CREATE TABLE city_waste_generation (city VARCHAR(20), state VARCHAR(20), year INT, quantity FLOAT); INSERT INTO city_waste_generation (city, state, year, quantity) VALUES ('New York City', 'New York', 2021, 1000000); INSERT INTO city_waste_generation (city, state, year, quantity) VALUES ('Buffalo', 'New York', 2021, 150000);", "sql": "SELECT SUM(quantity) as total_quantity FROM city_waste_generation WHERE state = 'New York' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "List all defense projects with timelines starting in 2020 or later.", "schema": "CREATE TABLE defense_projects (project_name VARCHAR(255), start_date DATE, end_date DATE); INSERT INTO defense_projects (project_name, start_date, end_date) VALUES ('Project A', '2021-01-01', '2023-12-31'), ('Project B', '2019-01-01', '2022-12-31'), ('Project C', '2020-01-01', '2024-12-31');", "sql": "SELECT project_name FROM defense_projects WHERE start_date >= '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the driver in 1955?", "schema": "CREATE TABLE table_name_18 (driver VARCHAR, year VARCHAR)", "sql": "SELECT driver FROM table_name_18 WHERE year = '1955';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the game in which Will Bynum (5) did the high assists played?", "schema": "CREATE TABLE table_27755603_2 (location_attendance VARCHAR, high_assists VARCHAR)", "sql": "SELECT location_attendance FROM table_27755603_2 WHERE high_assists = 'Will Bynum (5)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Update policy records for policyholders with policy_id 101, 102, and 103 in the 'Policy' table.", "schema": "CREATE TABLE Policy (policy_id INT, policyholder_state VARCHAR(20));", "sql": "UPDATE Policy SET policyholder_state = 'NY' WHERE policy_id IN (101, 102, 103);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest Year with Marianne Jean-Baptiste as the Golden Globe Actor?", "schema": "CREATE TABLE table_name_50 (year INTEGER, actor VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_50 WHERE actor = 'marianne jean-baptiste';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many exploration and appraisal wells were drilled in 2018 and 2019?", "schema": "CREATE TABLE wells (id INT, year INT, well_type VARCHAR(255), status VARCHAR(255)); INSERT INTO wells (id, year, well_type, status) VALUES (1, 2018, 'Exploration', 'Drilled'), (2, 2018, 'Appraisal', 'Drilled'), (3, 2019, 'Exploration', 'Drilled'), (4, 2019, 'Appraisal', 'Drilled'), (5, 2017, 'Production', 'Drilled');", "sql": "SELECT SUM(CASE WHEN year IN (2018, 2019) AND status = 'Drilled' AND (well_type = 'Exploration' OR well_type = 'Appraisal') THEN 1 ELSE 0 END) as e_and_a_wells FROM wells;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the the president with date of inauguration being 4june1979", "schema": "CREATE TABLE table_12134383_1 (president VARCHAR, date_of_inauguration VARCHAR)", "sql": "SELECT president FROM table_12134383_1 WHERE date_of_inauguration = '4June1979';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the 1st member of the parliament that was dissolved 23 february 1510?", "schema": "CREATE TABLE table_name_51 (dissolved VARCHAR)", "sql": "SELECT 1 AS st_member FROM table_name_51 WHERE dissolved = '23 february 1510';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the height of the player who attended Hartford?", "schema": "CREATE TABLE table_11734041_2 (height_in_ft VARCHAR, school_club_team_country VARCHAR)", "sql": "SELECT height_in_ft FROM table_11734041_2 WHERE school_club_team_country = 'Hartford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "List all suppliers for organic fruits and vegetables, along with their contact information and product details.", "schema": "CREATE TABLE Suppliers (SupplierID int, Name varchar(50), Contact varchar(50), Industry varchar(50)); INSERT INTO Suppliers (SupplierID, Name, Contact, Industry) VALUES (1, 'Green Valley', 'contact@greenvalley.com', 'Organic Fruits'); CREATE TABLE Products (ProductID int, Name varchar(50), SupplierID int, Details varchar(50)); INSERT INTO Products (ProductID, Name, SupplierID, Details) VALUES (1, 'Apples', 1, 'Organic');", "sql": "SELECT Suppliers.Name, Suppliers.Contact, Products.Name, Products.Details FROM Suppliers INNER JOIN Products ON Suppliers.SupplierID = Products.SupplierID WHERE Suppliers.Industry = 'Organic Fruits' AND Products.Details = 'Organic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "List all unique funding sources for arts and culture programs in the 'Northeast' region, excluding those with a total funding amount less than $10,000.", "schema": "CREATE TABLE FundingSources (funding_source VARCHAR(20), region VARCHAR(20), total_funding DECIMAL(10,2)); INSERT INTO FundingSources (funding_source, region, total_funding) VALUES ('Cultural Trust', 'Northeast', 25000), ('Arts Foundation', 'Northeast', 12000), ('City Grants', 'Southeast', 15000);", "sql": "SELECT DISTINCT funding_source FROM FundingSources WHERE region = 'Northeast' AND total_funding >= 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "List all mobile subscribers in the Americas who have exceeded their data usage limit in the last month.", "schema": "CREATE TABLE mobile_subscribers (id INT, region VARCHAR(20), data_usage INT, usage_date DATE); CREATE TABLE data_limits (id INT, subscriber_id INT, limit INT);", "sql": "SELECT m.id, m.region, m.data_usage, m.usage_date FROM mobile_subscribers m INNER JOIN data_limits d ON m.id = d.subscriber_id WHERE m.region = 'Americas' AND m.usage_date > DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND m.data_usage > d.limit;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district is Charles H. Wilson the incumbent of?", "schema": "CREATE TABLE table_1341663_6 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1341663_6 WHERE incumbent = 'Charles H. Wilson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Find the total cost of all projects in the Water_Infrastructure table", "schema": "CREATE TABLE Water_Infrastructure (project_id INT, project_name VARCHAR(255), location VARCHAR(255), cost FLOAT);", "sql": "SELECT SUM(cost) FROM Water_Infrastructure;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the count of claims for policy number 1004?", "schema": "CREATE TABLE claims (claim_id INT, policy_id INT); INSERT INTO claims (claim_id, policy_id) VALUES (1, 1001), (2, 1002), (3, 1003), (4, 1002), (5, 1004), (6, 1004);", "sql": "SELECT COUNT(*) FROM claims WHERE policy_id = 1004;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the average fare for buses in the 'san_francisco' schema?", "schema": "CREATE TABLE san_francisco.bus_fares (id INT, route_id INT, fare DECIMAL); INSERT INTO san_francisco.bus_fares (id, route_id, fare) VALUES (1, 101, 2.5), (2, 102, 2), (3, 103, 3);", "sql": "SELECT AVG(fare) FROM san_francisco.bus_fares;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 433).", "schema": null, "sql": "-- Invert that.\nSELECT * FROM check_test(\n results_ne(\n 'nenames_ord',\n 'SELECT id, name FROM annames WHERE name <> ''Antonio'''\n ),\n true,\n 'results_ne(prepared, select) missing first row',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "Update the last name of member with ID 1 to 'Smith'.", "schema": "CREATE TABLE Members (MemberID INT, FirstName VARCHAR(50), LastName VARCHAR(50)); INSERT INTO Members (MemberID, FirstName, LastName) VALUES (1, 'John', 'Doe'); INSERT INTO Members (MemberID, FirstName, LastName) VALUES (2, 'Jane', 'Doe');", "sql": "UPDATE Members SET LastName = 'Smith' WHERE MemberID = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the low rank for hale irwin with under 32 wins?", "schema": "CREATE TABLE table_name_37 (rank INTEGER, player VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_37 WHERE player = 'hale irwin' AND wins < 32;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many draft pick positions did Matt Bradley have?", "schema": "CREATE TABLE table_2840500_4 (position VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(position) FROM table_2840500_4 WHERE player = 'Matt Bradley';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'fast_default' (example 26).", "schema": null, "sql": "INSERT INTO T VALUES (1), (2);", "explanation": "DML from PostgreSQL core regression test for Fast Default.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What surface was the match on November 6, 1982 played on?", "schema": "CREATE TABLE table_name_82 (surface VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_82 WHERE date = 'november 6, 1982';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What name has a qual 2 of 1:46.025?", "schema": "CREATE TABLE table_name_33 (name VARCHAR, qual_2 VARCHAR)", "sql": "SELECT name FROM table_name_33 WHERE qual_2 = '1:46.025';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Kris doolan's league number?", "schema": "CREATE TABLE table_22683369_8 (league VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(league) FROM table_22683369_8 WHERE player = 'Kris Doolan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List all warehouses in North America and their corresponding capacities.", "schema": "CREATE TABLE Warehouses (id INT, warehouse_name VARCHAR(50), warehouse_country VARCHAR(50), warehouse_capacity INT); INSERT INTO Warehouses (id, warehouse_name, warehouse_country, warehouse_capacity) VALUES (1, 'Seattle Warehouse', 'USA', 5000), (2, 'Toronto Warehouse', 'Canada', 4000), (3, 'Mexico City Warehouse', 'Mexico', 3000);", "sql": "SELECT warehouse_country, warehouse_name, warehouse_capacity FROM Warehouses WHERE warehouse_country IN ('USA', 'Canada', 'Mexico');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which landfall was in category 1 for Saffir-Simpson in 1999?", "schema": "CREATE TABLE table_name_85 (landfall VARCHAR, saffir_simpson_category VARCHAR, year VARCHAR)", "sql": "SELECT landfall FROM table_name_85 WHERE saffir_simpson_category = 1 AND year = 1999;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the minimum transaction amount for clients with a credit score greater than 750 in Q2 2023?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(50), credit_score INT, last_transaction_date DATE);CREATE TABLE transactions (transaction_id INT, client_id INT, transaction_date DATE, total_amount DECIMAL(10,2));", "sql": "SELECT MIN(total_amount) FROM transactions t INNER JOIN clients c ON t.client_id = c.client_id WHERE c.credit_score > 750 AND t.transaction_date BETWEEN '2023-04-01' AND '2023-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "What was the total program impact by program category in H1 2023?", "schema": "CREATE TABLE program_categories (id INT, category VARCHAR(50), impact INT); INSERT INTO program_categories (id, category, impact) VALUES (1, 'Education', 200), (2, 'Healthcare', 300), (3, 'Environment', 150);", "sql": "SELECT category, SUM(impact) FROM program_categories WHERE category IN ('Education', 'Healthcare', 'Environment') GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Which regions in Japan have the most volunteers?", "schema": "CREATE TABLE regions (id INT, region_name TEXT); CREATE TABLE volunteers (id INT, region_id INT, volunteer_count INT); INSERT INTO regions (id, region_name) VALUES (1, 'Kanto'), (2, 'Kansai'), (3, 'Chubu'); INSERT INTO volunteers (id, region_id, volunteer_count) VALUES (1, 1, 300), (2, 2, 250), (3, 1, 200);", "sql": "SELECT regions.region_name, SUM(volunteers.volunteer_count) as total_volunteers FROM regions JOIN volunteers ON regions.id = volunteers.region_id GROUP BY regions.region_name ORDER BY total_volunteers DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Show the average revenue for restaurants in each city.", "schema": "CREATE TABLE Restaurants (restaurant_id INT, name TEXT, city TEXT, revenue FLOAT); INSERT INTO Restaurants (restaurant_id, name, city, revenue) VALUES (1, 'Asian Fusion', 'New York', 50000.00), (2, 'Bella Italia', 'Los Angeles', 60000.00), (3, 'Sushi House', 'New York', 70000.00), (4, 'Pizzeria La Rosa', 'Chicago', 80000.00);", "sql": "SELECT city, AVG(revenue) FROM Restaurants GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "How many marine mammals are there in the 'species' table, and what is their average conservation status ranking?", "schema": "CREATE TABLE species (species_id INT, common_name VARCHAR(50), latin_name VARCHAR(50), conservation_status VARCHAR(50), class VARCHAR(50)); INSERT INTO species (species_id, common_name, latin_name, conservation_status, class) VALUES (1, 'Green Sea Turtle', 'Chelonia mydas', 'Vulnerable', 'Reptilia'), (2, 'Bottlenose Dolphin', 'Tursiops truncatus', 'Least Concern', 'Mammalia');", "sql": "SELECT class, COUNT(*), AVG(CASE WHEN conservation_status = 'Critically Endangered' THEN 5 WHEN conservation_status = 'Endangered' THEN 4 WHEN conservation_status = 'Vulnerable' THEN 3 WHEN conservation_status = 'Near Threatened' THEN 2 WHEN conservation_status = 'Least Concern' THEN 1 ELSE 0 END) as conservation_rank FROM species WHERE class = 'Mammalia' GROUP BY class;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 373, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the density (hab/ km²) when the altitude m is 1300?", "schema": "CREATE TABLE table_name_61 (density__hab__km²__ VARCHAR, altitude_m VARCHAR)", "sql": "SELECT density__hab__km²__ FROM table_name_61 WHERE altitude_m = '1300';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 97).", "schema": null, "sql": "select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS \"true\";", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS \"true\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 580).", "schema": null, "sql": "select mr_polymorphic(int4range(1, 4));", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select mr_polymorphic(int4range(1, 4))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "What is the total quantity of sustainable material used by each supplier?", "schema": "CREATE TABLE suppliers (supplier_id INT, supplier_name TEXT); CREATE TABLE materials (material_id INT, material_name TEXT, is_sustainable BOOLEAN); CREATE TABLE inventory (inventory_id INT, supplier_id INT, material_id INT, quantity INT); INSERT INTO suppliers (supplier_id, supplier_name) VALUES (1, 'Green Supplies'), (2, 'Eco Friendly Inc.'); INSERT INTO materials (material_id, material_name, is_sustainable) VALUES (1, 'Organic Cotton', TRUE), (2, 'Polyester', FALSE); INSERT INTO inventory (inventory_id, supplier_id, material_id, quantity) VALUES (1, 1, 1, 100), (2, 1, 2, 50), (3, 2, 1, 75), (4, 2, 2, 150);", "sql": "SELECT supplier_id, SUM(quantity) FROM inventory JOIN materials ON inventory.material_id = materials.material_id WHERE is_sustainable = TRUE GROUP BY supplier_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "What is the highest temperature recorded in 'Berlin'?", "schema": "CREATE TABLE weather (city VARCHAR(255), temperature FLOAT, date DATE); INSERT INTO weather (city, temperature, date) VALUES ('Berlin', 80, '2022-05-01'), ('Berlin', 85, '2022-06-15'), ('Berlin', 90, '2022-07-20');", "sql": "SELECT MAX(temperature) FROM weather WHERE city = 'Berlin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the name and location of all heritage sites that have craft workshops?", "schema": "CREATE TABLE heritage_sites (id INT, name VARCHAR, location VARCHAR); INSERT INTO heritage_sites (id, name, location) VALUES (1, 'Heritage Site A', 'City A'), (2, 'Heritage Site B', 'City B'); CREATE TABLE workshops (id INT, type VARCHAR, site_id INT); INSERT INTO workshops (id, type, site_id) VALUES (1, 'Craft', 1), (2, 'Performance', 2);", "sql": "SELECT heritage_sites.name, heritage_sites.location FROM heritage_sites INNER JOIN workshops ON heritage_sites.id = workshops.site_id WHERE workshops.type = 'Craft';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_join", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many grid numbers were there for the driver Giancarlo Fisichella?", "schema": "CREATE TABLE table_name_3 (grid VARCHAR, driver VARCHAR)", "sql": "SELECT COUNT(grid) FROM table_name_3 WHERE driver = 'giancarlo fisichella';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the district for nairs 8.2", "schema": "CREATE TABLE table_23214055_2 (district VARCHAR, nairs VARCHAR)", "sql": "SELECT district FROM table_23214055_2 WHERE nairs = '8.2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'prepared_xacts' (example 25).", "schema": null, "sql": "INSERT INTO pxtest1 VALUES ('fff');", "explanation": "DML from PostgreSQL core regression test for Prepared Xacts.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who's the opposition at westpac stadium when the attendance is 31,853?", "schema": "CREATE TABLE table_name_56 (opposition VARCHAR, stadium VARCHAR, attendance VARCHAR)", "sql": "SELECT opposition FROM table_name_56 WHERE stadium = 'westpac stadium' AND attendance = '31,853';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Which climate communication projects are not in 'North America'?", "schema": "CREATE TABLE climate_communication (project_id INTEGER, project_name TEXT, location TEXT); INSERT INTO climate_communication (project_id, project_name, location) VALUES (1, 'Project I', 'North America'), (2, 'Project J', 'Europe');", "sql": "SELECT project_name FROM climate_communication WHERE location != 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the minimum dissolved oxygen level recorded in each monitoring zone in the past year?", "schema": "CREATE TABLE yearly_dissolved_oxygen (zone_id INT, zone_name TEXT, date DATE, dissolved_oxygen FLOAT); INSERT INTO yearly_dissolved_oxygen (zone_id, zone_name, date, dissolved_oxygen) VALUES (1, 'Zone A', '2022-01-01', 8.1), (2, 'Zone B', '2022-01-01', 7.9), (3, 'Zone C', '2022-01-01', 8.5), (1, 'Zone A', '2022-01-02', 7.8), (2, 'Zone B', '2022-01-02', 8.2), (3, 'Zone C', '2022-01-02', 8.6);", "sql": "SELECT zone_name, MIN(dissolved_oxygen) FROM yearly_dissolved_oxygen WHERE date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY zone_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 118).", "schema": null, "sql": "select 'aa=>1 , b=>2, cq=>3'::hstore || 'aa=>l';", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest crowd number of the game where the away team was south melbourne?", "schema": "CREATE TABLE table_name_71 (crowd INTEGER, away_team VARCHAR)", "sql": "SELECT MAX(crowd) FROM table_name_71 WHERE away_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the highest number of consecutive successful free throws by each basketball player in the 2022 season?", "schema": "CREATE TABLE free_throws (player_id INT, player_name VARCHAR(50), consecutive_successful_free_throws INT); INSERT INTO free_throws (player_id, player_name, consecutive_successful_free_throws) VALUES (1, 'LeBron James', 56), (2, 'Stephen Curry', 62), (3, 'Nikola Jokic', 45), (4, 'James Harden', 72), (5, 'Luka Doncic', 58);", "sql": "SELECT player_name, MAX(consecutive_successful_free_throws) as max_consecutive_successful_free_throws FROM free_throws GROUP BY player_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE INDEX (example 1).", "schema": null, "sql": "CREATE UNIQUE INDEX title_idx ON films (title);", "explanation": "PostgreSQL CREATE INDEX command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the average depth of all underground mines?", "schema": "CREATE TABLE underground_mines (mine_id INT, mine_name VARCHAR(50), depth FLOAT); INSERT INTO underground_mines (mine_id, mine_name, depth) VALUES (1, 'Mine X', 1200), (2, 'Mine Y', 1500), (3, 'Mine Z', 1800);", "sql": "SELECT AVG(depth) FROM underground_mines WHERE method_name = 'Underground';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the distance for the winner or 2nd Waterline?", "schema": "CREATE TABLE table_name_21 (distance VARCHAR, winner_or_2nd VARCHAR)", "sql": "SELECT distance FROM table_name_21 WHERE winner_or_2nd = 'waterline';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team is owned by Mark Smith and has Paul Clapprood as a crew chief?", "schema": "CREATE TABLE table_name_8 (team VARCHAR, owner_s_ VARCHAR, crew_chief VARCHAR)", "sql": "SELECT team FROM table_name_8 WHERE owner_s_ = 'mark smith' AND crew_chief = 'paul clapprood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time/retired if the driver is Marco Andretti?", "schema": "CREATE TABLE table_17693171_1 (time_retired VARCHAR, driver VARCHAR)", "sql": "SELECT time_retired FROM table_17693171_1 WHERE driver = 'Marco Andretti';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district did Donald Ray Matthews belong to?", "schema": "CREATE TABLE table_1341865_11 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1341865_11 WHERE incumbent = 'Donald Ray Matthews';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Event has a Competition of world championships?", "schema": "CREATE TABLE table_name_77 (event VARCHAR, competition VARCHAR)", "sql": "SELECT event FROM table_name_77 WHERE competition = 'world championships';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many performances are there?", "schema": "CREATE TABLE performance (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM performance;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Delete any soil moisture readings that have a moisture level below 30.", "schema": "CREATE TABLE Soil_Moisture (ID INT, Moisture FLOAT, Timestamp DATETIME); INSERT INTO Soil_Moisture (ID, Moisture, Timestamp) VALUES (1, 45, '2022-01-01 10:00:00'), (2, 28, '2022-01-15 12:00:00');", "sql": "DELETE FROM Soil_Moisture WHERE Moisture < 30;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "How many mental health parity violations occurred in each region over the past year?", "schema": "CREATE TABLE MentalHealthParity (ViolationID INT, Region VARCHAR(255), ViolationDate DATE); INSERT INTO MentalHealthParity (ViolationID, Region, ViolationDate) VALUES (1, 'Northeast', '2022-01-02'), (2, 'Southeast', '2022-03-04'), (3, 'Midwest', '2022-05-01'), (4, 'Southwest', '2022-07-15'), (5, 'West', '2022-09-30');", "sql": "SELECT Region, COUNT(*) as ViolationCount FROM MentalHealthParity WHERE ViolationDate >= DATEADD(year, -1, GETDATE()) GROUP BY Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which surface has wynne prakusya as the opponent in the final?", "schema": "CREATE TABLE table_name_51 (surface VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT surface FROM table_name_51 WHERE opponent_in_the_final = 'wynne prakusya';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which chassis scored 52 points?", "schema": "CREATE TABLE table_name_55 (chassis VARCHAR, points VARCHAR)", "sql": "SELECT chassis FROM table_name_55 WHERE points = 52;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the total number of network infrastructure investments in the 'Asia' region?", "schema": "CREATE TABLE network_investments (investment_id INT, investment_amount FLOAT, investment_type VARCHAR(20), region VARCHAR(20)); INSERT INTO network_investments (investment_id, investment_amount, investment_type, region) VALUES (1, 500000, 'Cell Tower', 'Asia'), (2, 750000, 'Broadband Node', 'Europe'), (3, 300000, 'Cell Tower', 'Africa'), (4, 900000, 'Broadband Node', 'North America');", "sql": "SELECT SUM(investment_amount) FROM network_investments WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 129).", "schema": null, "sql": "insert into rtest_empmass values ('meyer', '4000.00');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "List all the broadband subscribers in Texas who have a plan with speeds over 100 Mbps?", "schema": "CREATE TABLE broadband_subscribers (subscriber_id INT, home_location VARCHAR(50), plan_speed DECIMAL(10,2)); INSERT INTO broadband_subscribers (subscriber_id, home_location, plan_speed) VALUES (1, 'Texas', 150), (2, 'California', 75), (3, 'Texas', 120);", "sql": "SELECT subscriber_id, home_location FROM broadband_subscribers WHERE plan_speed > 100 AND home_location = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all student ids and the number of hours played.", "schema": "CREATE TABLE Plays_games (Stuid VARCHAR, hours_played INTEGER)", "sql": "SELECT Stuid, SUM(hours_played) FROM Plays_games GROUP BY Stuid;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many new broadband subscribers have signed up in the state of California in the last 30 days?", "schema": "CREATE TABLE subscribers (id INT, subscriber_type VARCHAR(50), subscribe_date DATE);", "sql": "SELECT COUNT(*) FROM subscribers WHERE subscriber_type = 'broadband' AND subscribe_date >= CURDATE() - INTERVAL 30 DAY AND subscribe_date < CURDATE() AND state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what was the length of the game on may 19", "schema": "CREATE TABLE table_name_10 (length VARCHAR, date VARCHAR)", "sql": "SELECT length FROM table_name_10 WHERE date = 'may 19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the number of episodes when the genre is modern drama and the highest average ratings points are 28?", "schema": "CREATE TABLE table_name_34 (number_of_episodes INTEGER, genre VARCHAR, highest_average_point_ratings VARCHAR)", "sql": "SELECT AVG(number_of_episodes) FROM table_name_34 WHERE genre = 'modern drama' AND highest_average_point_ratings = 28;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many player with no being 12", "schema": "CREATE TABLE table_12962773_5 (player VARCHAR, no VARCHAR)", "sql": "SELECT COUNT(player) FROM table_12962773_5 WHERE no = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "List all the safety protocols for factories located in Ohio or Michigan?", "schema": "CREATE TABLE factories (factory_id INT, name TEXT, location TEXT); INSERT INTO factories (factory_id, name, location) VALUES (1, 'Factory A', 'Ohio'), (2, 'Factory B', 'Michigan'), (3, 'Factory C', 'California'); CREATE TABLE safety_protocols (protocol_id INT, factory_id INT, protocol TEXT); INSERT INTO safety_protocols (protocol_id, factory_id, protocol) VALUES (1, 1, 'Fire Safety'), (2, 1, 'Emergency Exits'), (3, 2, 'Fire Safety'), (4, 2, 'Chemical Spills'), (5, 3, 'Fire Safety'), (6, 3, 'Hurricane Preparedness');", "sql": "SELECT s.protocol FROM factories f JOIN safety_protocols s ON f.factory_id = s.factory_id WHERE f.location IN ('Ohio', 'Michigan');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "What is the average response time for emergency calls in the 'Mountain' region?", "schema": "CREATE TABLE emergency_calls (id INT, region VARCHAR(20), response_time INT); INSERT INTO emergency_calls (id, region, response_time) VALUES (1, 'Mountain', 100), (2, 'Mountain', 120), (3, 'Mountain', 85);", "sql": "SELECT AVG(response_time) FROM emergency_calls WHERE region = 'Mountain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score on 17 November 1956 when home team is Derby County?", "schema": "CREATE TABLE table_name_36 (score VARCHAR, date VARCHAR, home_team VARCHAR)", "sql": "SELECT score FROM table_name_36 WHERE date = '17 november 1956' AND home_team = 'derby county';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date has a Home of washington, a Visitor of florida, and a Record of 8-15-2?", "schema": "CREATE TABLE table_name_78 (date VARCHAR, record VARCHAR, home VARCHAR, visitor VARCHAR)", "sql": "SELECT date FROM table_name_78 WHERE home = 'washington' AND visitor = 'florida' AND record = '8-15-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the away team score when the home team was the Brisbane Lions?", "schema": "CREATE TABLE table_name_83 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_83 WHERE home_team = 'brisbane lions';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the report for the race that Mike Spence won.", "schema": "CREATE TABLE table_1140099_6 (report VARCHAR, winning_driver VARCHAR)", "sql": "SELECT report FROM table_1140099_6 WHERE winning_driver = 'Mike Spence';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score has 15.0% as the 2012?", "schema": "CREATE TABLE table_name_16 (score VARCHAR)", "sql": "SELECT score FROM table_name_16 WHERE 2012 = '15.0%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the competition held in 2003?", "schema": "CREATE TABLE table_name_17 (competition VARCHAR, year VARCHAR)", "sql": "SELECT competition FROM table_name_17 WHERE year = 2003;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of artworks in the modern and contemporary categories?", "schema": "CREATE TABLE Artworks (category VARCHAR(20), quantity INT); INSERT INTO Artworks (category, quantity) VALUES ('Modern', 1200), ('Modern', 1500), ('Contemporary', 800), ('Contemporary', 900);", "sql": "SELECT SUM(quantity) FROM Artworks WHERE category IN ('Modern', 'Contemporary');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the highest number of goals when 2428 minutes were played?", "schema": "CREATE TABLE table_name_91 (goals INTEGER, minutes VARCHAR)", "sql": "SELECT MAX(goals) FROM table_name_91 WHERE minutes = 2428;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average age of soccer players from the UK?", "schema": "CREATE TABLE Teams (TeamID INT PRIMARY KEY, TeamName VARCHAR(100), Sport VARCHAR(50), Country VARCHAR(50)); INSERT INTO Teams (TeamID, TeamName, Sport, Country) VALUES (1, 'Manchester United', 'Soccer', 'England'); CREATE TABLE Players (PlayerID INT PRIMARY KEY, Name VARCHAR(100), Age INT, Sport VARCHAR(50), Country VARCHAR(50), TeamID INT, FOREIGN KEY (TeamID) REFERENCES Teams(TeamID)); INSERT INTO Players (PlayerID, Name, Age, Sport, Country, TeamID) VALUES (1, 'Alice Johnson', 28, 'Soccer', 'England', 1); INSERT INTO Players (PlayerID, Name, Age, Sport, Country, TeamID) VALUES (2, 'Bob Williams', 31, 'Soccer', 'England', 1);", "sql": "SELECT AVG(Age) as AvgAge FROM Players WHERE Sport = 'Soccer' AND Country = 'England';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average number of visitors per day for the 'Contemporary Art' exhibition in 2021?", "schema": "CREATE TABLE Exhibition_Daily_Attendance (exhibition_id INT, visit_date DATE, visitor_count INT); CREATE TABLE Exhibitions (id INT, name VARCHAR(50)); INSERT INTO Exhibitions (id, name) VALUES (1, 'Contemporary Art'); ALTER TABLE Exhibition_Daily_Attendance ADD FOREIGN KEY (exhibition_id) REFERENCES Exhibitions(id);", "sql": "SELECT AVG(visitor_count) FROM Exhibition_Daily_Attendance WHERE exhibition_id = 1 AND visit_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which club has a capacity of 25138?", "schema": "CREATE TABLE table_name_4 (club VARCHAR, capacity VARCHAR)", "sql": "SELECT club FROM table_name_4 WHERE capacity = '25138';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total CO2 emissions in Canada, and how does it break down by sector?", "schema": "CREATE TABLE co2_emissions (id INT, country VARCHAR(255), sector VARCHAR(255), emissions FLOAT);", "sql": "SELECT sector, SUM(emissions) FROM co2_emissions WHERE country = 'Canada' GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the number of players who have rated game 1 and game 2?", "schema": "CREATE TABLE PlayerRatings (PlayerID INT, GameID INT, Rating FLOAT); INSERT INTO PlayerRatings (PlayerID, GameID, Rating) VALUES (1, 1, 8.5), (1, 2, 9.2), (2, 1, 7.8), (2, 2, 8.9), (3, 1, 8.1), (3, 2, 9.0);", "sql": "SELECT GameID, COUNT(DISTINCT PlayerID) FROM PlayerRatings GROUP BY GameID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the home team for the match against Arsenal?", "schema": "CREATE TABLE table_name_2 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team FROM table_name_2 WHERE away_team = 'arsenal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 536).", "schema": null, "sql": "CREATE TABLE plt2_adv_default PARTITION OF plt2_adv DEFAULT;", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What are the names and locations of marine protected areas that overlap with the ocean floor mapping project?", "schema": "CREATE TABLE marine_protected_areas (name TEXT, location TEXT, avg_depth REAL); INSERT INTO marine_protected_areas (name, location, avg_depth) VALUES ('Galapagos Marine Reserve', 'Ecuador', 200.0), ('Great Barrier Reef', 'Australia', 35.0); CREATE TABLE ocean_floor_mapping (location TEXT, depth REAL); INSERT INTO ocean_floor_mapping (location, depth) VALUES ('Mariana Trench', 10994.0), ('Sunda Trench', 8047.0);", "sql": "SELECT name, location FROM marine_protected_areas WHERE location IN (SELECT location FROM ocean_floor_mapping);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Show all records from 'smart_grid_view'", "schema": "CREATE TABLE smart_grid (id INT PRIMARY KEY, city VARCHAR(50), power_sources VARCHAR(50), renewable_energy_percentage INT); CREATE VIEW smart_grid_view AS SELECT * FROM smart_grid;", "sql": "SELECT * FROM smart_grid_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the percentage which has females of 2", "schema": "CREATE TABLE table_name_17 (percentage___percentage_ VARCHAR, females VARCHAR)", "sql": "SELECT percentage___percentage_ FROM table_name_17 WHERE females = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What was the recycling rate in percentage for the region 'Seattle' in 2020?", "schema": "CREATE TABLE recycling_rates (region VARCHAR(50), year INT, recycling_rate FLOAT); INSERT INTO recycling_rates (region, year, recycling_rate) VALUES ('Seattle', 2020, 35.67);", "sql": "SELECT recycling_rate FROM recycling_rates WHERE region = 'Seattle' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many gp-gs have 121.70 as an effic and an avg/g less than 218.7?", "schema": "CREATE TABLE table_name_41 (gp_gs VARCHAR, effic VARCHAR, avg_g VARCHAR)", "sql": "SELECT COUNT(gp_gs) FROM table_name_41 WHERE effic = '121.70' AND avg_g < 218.7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the average price of non-sustainable materials for each supplier, showing only suppliers with more than 3 products using their materials?", "schema": "CREATE TABLE supplier (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50), sustainable BOOLEAN); CREATE TABLE material (id INT PRIMARY KEY, name VARCHAR(100), supplier_id INT, price DECIMAL(5,2)); CREATE TABLE product (id INT PRIMARY KEY, name VARCHAR(100), manufacturer_id INT, price DECIMAL(5,2), sustainable BOOLEAN); CREATE TABLE manufacturer (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50), sustainable BOOLEAN); CREATE VIEW sustainable_materials AS SELECT material.id, material.name, material.price, supplier.name as supplier_name FROM material INNER JOIN supplier ON material.supplier_id = supplier.id WHERE supplier.sustainable = TRUE;", "sql": "SELECT supplier_name, AVG(material.price) as average_price FROM material INNER JOIN product ON material.id = product.material_id INNER JOIN supplier ON material.supplier_id = supplier.id WHERE supplier.sustainable = FALSE GROUP BY supplier.name HAVING COUNT(*) > 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 265, "num_statements": 1} {"question": "What is the age distribution of visitors who attended the \"Fauvism\" exhibition?", "schema": "CREATE TABLE visitors (visitor_id INT, age INT, visited_fauvism BOOLEAN); INSERT INTO visitors (visitor_id, age, visited_fauvism) VALUES (123, 45, TRUE), (456, 32, FALSE), (789, 51, TRUE), (111, 64, TRUE), (222, 21, FALSE);", "sql": "SELECT CASE WHEN age < 30 THEN '18-29' WHEN age < 50 THEN '30-49' ELSE '50+' END AS age_range, COUNT(*) AS visitor_count FROM visitors WHERE visited_fauvism = TRUE GROUP BY age_range;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "What is the total cargo weight handled by carriers from Spain?", "schema": "CREATE TABLE Carrier (CarrierID INT, Name VARCHAR(255), Country VARCHAR(255), HomePort VARCHAR(255)); INSERT INTO Carrier (CarrierID, Name, Country, HomePort) VALUES (2, 'MSC Mediterranean Shipping Company', 'Switzerland', 'Genoa'); INSERT INTO Carrier (CarrierID, Name, Country, HomePort) VALUES (3, 'Maersk', 'Denmark', 'Copenhagen'); INSERT INTO Carrier (CarrierID, Name, Country, HomePort) VALUES (4, 'COSCO Shipping', 'China', 'Shanghai'); INSERT INTO Carrier (CarrierID, Name, Country, HomePort) VALUES (5, 'Hapag-Lloyd', 'Germany', 'Hamburg'); INSERT INTO Carrier (CarrierID, Name, Country, HomePort) VALUES (6, 'Evergreen Marine', 'Taiwan', 'Taipei');", "sql": "SELECT SUM(Cargo.Weight) FROM Cargo JOIN Vessel ON Cargo.VesselID = Vessel.VesselID JOIN Carrier ON Vessel.CarrierID = Carrier.CarrierID WHERE Carrier.Country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "What is the average dissolved oxygen level for each species in Tank1?", "schema": "CREATE TABLE Tank1 (species VARCHAR(50), dissolved_oxygen FLOAT); INSERT INTO Tank1 (species, dissolved_oxygen) VALUES ('Salmon', 6.5), ('Trout', 7.2), ('Tilapia', 5.8);", "sql": "SELECT species, AVG(dissolved_oxygen) FROM Tank1 GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many gb's have an iso number of cn-65?", "schema": "CREATE TABLE table_254234_1 (gb VARCHAR, iso_№ VARCHAR)", "sql": "SELECT COUNT(gb) FROM table_254234_1 WHERE iso_№ = 'CN-65';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which band serves darwin for commercial purpose?", "schema": "CREATE TABLE table_name_61 (band VARCHAR, area_served VARCHAR, purpose VARCHAR)", "sql": "SELECT band FROM table_name_61 WHERE area_served = 'darwin' AND purpose = 'commercial';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the diameter of the 2006 equestrian Reverse?", "schema": "CREATE TABLE table_name_49 (diameter VARCHAR, year VARCHAR, reverse VARCHAR)", "sql": "SELECT diameter FROM table_name_49 WHERE year = 2006 AND reverse = 'equestrian';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the club with 22 points?", "schema": "CREATE TABLE table_name_2 (club VARCHAR, points VARCHAR)", "sql": "SELECT club FROM table_name_2 WHERE points = '22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Find the menu items that have a lower than average revenue for their respective cuisine category.", "schema": "CREATE TABLE menu_items (menu_item_id INT, restaurant_id INT, name VARCHAR(255), revenue DECIMAL(10, 2), cuisine VARCHAR(255));", "sql": "SELECT m.name, AVG(o.revenue) OVER (PARTITION BY m.cuisine) AS avg_revenue FROM menu_items m JOIN orders o ON m.menu_item_id = o.menu_item_id WHERE m.revenue < AVG(o.revenue) OVER (PARTITION BY m.cuisine);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 205, "num_statements": 1} {"question": "Which decentralized applications belong to the 'Finance' category?", "schema": "CREATE TABLE dapps (id INT, name VARCHAR(255), category VARCHAR(50)); INSERT INTO dapps (id, name, category) VALUES (1, 'DApp1', 'Finance'); INSERT INTO dapps (id, name, category) VALUES (2, 'DApp2', 'Gaming'); INSERT INTO dapps (id, name, category) VALUES (3, 'DApp3', 'Social'); INSERT INTO dapps (id, name, category) VALUES (4, 'DApp4', 'Exchange');", "sql": "SELECT name FROM dapps WHERE category = 'Finance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the all the distinct names of the products with the characteristic name 'warm'.", "schema": "CREATE TABLE CHARACTERISTICS (characteristic_id VARCHAR, characteristic_name VARCHAR); CREATE TABLE products (product_name VARCHAR, product_id VARCHAR); CREATE TABLE product_characteristics (product_id VARCHAR, characteristic_id VARCHAR)", "sql": "SELECT DISTINCT t1.product_name FROM products AS t1 JOIN product_characteristics AS t2 ON t1.product_id = t2.product_id JOIN CHARACTERISTICS AS t3 ON t2.characteristic_id = t3.characteristic_id WHERE t3.characteristic_name = 'warm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the purse when Sherri Steinhauer was champion at Woburn Golf and Country Club?", "schema": "CREATE TABLE table_name_41 (purse___ VARCHAR, champion VARCHAR, venue VARCHAR)", "sql": "SELECT COUNT(purse___) AS $__ FROM table_name_41 WHERE champion = 'sherri steinhauer' AND venue = 'woburn golf and country club';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total amount of circuts dated 22 april?", "schema": "CREATE TABLE table_1140105_6 (circuit VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(circuit) FROM table_1140105_6 WHERE date = '22 April';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the tournament played on Oct 17, 1982, what was the winning score?", "schema": "CREATE TABLE table_name_69 (winning_score VARCHAR, date VARCHAR)", "sql": "SELECT winning_score FROM table_name_69 WHERE date = 'oct 17, 1982';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What percentage of cruelty-free products are also vegan?", "schema": "CREATE TABLE products (product_id INT, cruelty_free BOOLEAN, vegan BOOLEAN); INSERT INTO products VALUES (1, true, true), (2, false, false), (3, false, true), (4, true, false), (5, true, true), (6, false, true), (7, true, false), (8, false, false), (9, true, true), (10, false, false);", "sql": "SELECT (COUNT(p.cruelty_free AND p.vegan) * 100.0 / (SELECT COUNT(*) FROM products)) AS vegan_cruelty_free_percentage FROM products p;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "What is the average sale value per equipment type?", "schema": "CREATE TABLE Military_Equipment_Sales(id INT, sale_date DATE, country VARCHAR(50), equipment_type VARCHAR(50), sale_value FLOAT); INSERT INTO Military_Equipment_Sales(id, sale_date, country, equipment_type, sale_value) VALUES (1, '2020-01-01', 'USA', 'Naval', 70000000);", "sql": "SELECT equipment_type, AVG(sale_value) FROM Military_Equipment_Sales GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Delete the artifact with ArtifactID 2 from the database.", "schema": "CREATE TABLE ExcavationSites (SiteID INT, SiteName TEXT, Country TEXT); INSERT INTO ExcavationSites (SiteID, SiteName, Country) VALUES (1, 'MayanRuins', 'Guatemala'), (3, 'Pompeii', 'Italy'); CREATE TABLE Artifacts (ArtifactID INT, SiteID INT, ArtifactName TEXT, ArtifactType TEXT, Quantity INT); INSERT INTO Artifacts (ArtifactID, SiteID, ArtifactName, ArtifactType, Quantity) VALUES (1, 1, 'Jade Mask', 'Mask', 1), (2, 1, 'Obsidian Knife', 'Knife', 2), (4, 3, 'Roman Coin', 'Coin', 20);", "sql": "DELETE FROM Artifacts WHERE ArtifactID = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total budget allocated to waste management in Seoul in 2019?", "schema": "CREATE TABLE Waste (City VARCHAR(20), Year INT, Amount INT); INSERT INTO Waste (City, Year, Amount) VALUES ('Seoul', 2019, 6000);", "sql": "SELECT SUM(Amount) FROM Waste WHERE City = 'Seoul' AND Year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position is played by the player from the Chicago Cubs?", "schema": "CREATE TABLE table_name_8 (position VARCHAR, team VARCHAR)", "sql": "SELECT position FROM table_name_8 WHERE team = 'chicago cubs';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total number of animals in each status category in 'species_status' table?", "schema": "CREATE TABLE species_status (id INT, species_name VARCHAR(50), status VARCHAR(50)); INSERT INTO species_status VALUES (1, 'Tiger', 'Endangered'), (2, 'Elephant', 'Vulnerable');", "sql": "SELECT status, COUNT(*) FROM species_status GROUP BY status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Get the average hourly wage for construction workers in the 10 most populous states for the year 2020", "schema": "CREATE TABLE labor_statistics (state VARCHAR(2), year INT, total_workers INT, avg_hourly_wage DECIMAL(5,2), total_payroll DECIMAL(10,2)); INSERT INTO labor_statistics (state, year, total_workers, avg_hourly_wage, total_payroll) VALUES ('CA', 2020, 500000, 30.50, 762500000), ('TX', 2020, 450000, 29.80, 654750000), ('NY', 2020, 400000, 31.20, 624000000), ('FL', 2020, 350000, 27.90, 531500000), ('PA', 2020, 300000, 28.60, 498000000), ('IL', 2020, 250000, 27.30, 432500000), ('OH', 2020, 225000, 26.80, 403500000), ('MI', 2020, 200000, 28.10, 382000000), ('NJ', 2020, 175000, 30.10, 345750000), ('NC', 2020, 150000, 25.40, 280500000);", "sql": "SELECT state, AVG(avg_hourly_wage) FROM labor_statistics WHERE year = 2020 GROUP BY state ORDER BY total_workers DESC LIMIT 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What club team is Per Gustafsson play for?", "schema": "CREATE TABLE table_1013129_11 (college_junior_club_team VARCHAR, player VARCHAR)", "sql": "SELECT college_junior_club_team FROM table_1013129_11 WHERE player = 'Per Gustafsson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What ride opened after 2011?", "schema": "CREATE TABLE table_name_91 (current_name VARCHAR, year_first_opened INTEGER)", "sql": "SELECT current_name FROM table_name_91 WHERE year_first_opened > 2011;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Calculate the daily revenue for a taxi company in a specific city", "schema": "CREATE TABLE taxi_trip (trip_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, fare DECIMAL(10, 2), city VARCHAR(50));", "sql": "SELECT DATE(trip_start_time) AS trip_date, SUM(fare) AS daily_revenue FROM taxi_trip WHERE city = 'New York' GROUP BY trip_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show codes and fates of missions, and names of ships involved.", "schema": "CREATE TABLE mission (Code VARCHAR, Fate VARCHAR, Ship_ID VARCHAR); CREATE TABLE ship (Name VARCHAR, Ship_ID VARCHAR)", "sql": "SELECT T1.Code, T1.Fate, T2.Name FROM mission AS T1 JOIN ship AS T2 ON T1.Ship_ID = T2.Ship_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 846).", "schema": null, "sql": "CREATE ROLE regress_rls_frank;", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "What is the average rating of hip-hop songs released in 2015?", "schema": "CREATE TABLE Hip_Hop_Songs (title TEXT, year INTEGER, rating FLOAT); INSERT INTO Hip_Hop_Songs (title, year, rating) VALUES ('Song1', 2013, 7.5), ('Song2', 2014, 8.0), ('Song3', 2015, 8.5), ('Song4', 2016, 9.0), ('Song5', 2017, 9.2), ('Song6', 2018, 9.5);", "sql": "SELECT AVG(rating) FROM Hip_Hop_Songs WHERE year = 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of shelters constructed in \"North America\" in 2019?", "schema": "CREATE TABLE shelters (id INT, project_id INT, location VARCHAR(255), construction_date DATE); INSERT INTO shelters (id, project_id, location, construction_date) VALUES (1, 10001, 'USA', '2019-05-01'); INSERT INTO shelters (id, project_id, location, construction_date) VALUES (2, 10002, 'Canada', '2019-02-01');", "sql": "SELECT COUNT(*) FROM shelters WHERE location = 'North America' AND YEAR(construction_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What was the average age of attendees for each event in 2022?", "schema": "CREATE TABLE event_attendance (event_id INT, attendee_age INT, program_id INT, event_date DATE); INSERT INTO event_attendance (event_id, attendee_age, program_id, event_date) VALUES (1, 34, 101, '2022-05-12'); INSERT INTO event_attendance (event_id, attendee_age, program_id, event_date) VALUES (2, 45, 102, '2022-06-20');", "sql": "SELECT event_id, AVG(attendee_age) as avg_age FROM event_attendance WHERE YEAR(event_date) = 2022 GROUP BY event_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "List all rural infrastructure projects in Africa, along with their start and end dates, and the number of beneficiaries.", "schema": "CREATE SCHEMA if not exists rural_dev; use rural_dev; CREATE TABLE if not exists rural_infrastructure_projects (id INT, project_name VARCHAR(255), country VARCHAR(255), start_date DATE, end_date DATE, num_beneficiaries INT, PRIMARY KEY (id));", "sql": "SELECT project_name, start_date, end_date, num_beneficiaries FROM rural_dev.rural_infrastructure_projects WHERE country LIKE 'Africa%' OR country LIKE 'Algeria' OR country LIKE 'Egypt';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which nominations was the film 27 Stolen Kisses nominated for?", "schema": "CREATE TABLE table_10236830_6 (nomination VARCHAR, film_name VARCHAR)", "sql": "SELECT nomination FROM table_10236830_6 WHERE film_name = '27 Stolen Kisses';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "How many community education programs were conducted in '2021' and '2022' in the 'education_programs' table?", "schema": "CREATE TABLE education_programs (id INT, program_name VARCHAR(50), year INT, attendees INT); INSERT INTO education_programs (id, program_name, year, attendees) VALUES (1, 'Wildlife Conservation', 2021, 250);", "sql": "SELECT COUNT(*) FROM education_programs WHERE year IN (2021, 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total city area for vehari city with a serial number bigger than 36?", "schema": "CREATE TABLE table_name_1 (city_area_km_2__ VARCHAR, headquartered_city VARCHAR, serial_no VARCHAR)", "sql": "SELECT COUNT(city_area_km_2__) FROM table_name_1 WHERE headquartered_city = 'vehari city' AND serial_no > 36;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 339).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (7,9,'-818934540126709222.34685027005508022756223282084742813020271603840941647350440860843570182437301045468670059279379903480024743452620396345637401505220786389930600883087012615993343976556472498552535317826554614696684732913955544753638726438705858481670766245958647367500212800073774509075408148134050353551558174813940258910304990570172170811882520915334358633');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 401, "num_statements": 1} {"question": "What is the total number of military equipment serviced by defense contractors based in Texas?", "schema": "CREATE TABLE military_equipment (id INT, name VARCHAR(50), company VARCHAR(50), service_frequency INT, company_location VARCHAR(50)); INSERT INTO military_equipment (id, name, company, service_frequency, company_location) VALUES (1, 'M1 Abrams', 'XYZ', 12, 'Texas'); INSERT INTO military_equipment (id, name, company, service_frequency, company_location) VALUES (2, 'F-35', 'ABC', 24, 'New York');", "sql": "SELECT SUM(service_frequency) FROM military_equipment WHERE company_location = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many equivalent daily inflation rate with time required for prices to double being 3.7 days", "schema": "CREATE TABLE table_13681_2 (equivalent_daily_inflation_rate VARCHAR, time_required_for_prices_to_double VARCHAR)", "sql": "SELECT COUNT(equivalent_daily_inflation_rate) FROM table_13681_2 WHERE time_required_for_prices_to_double = '3.7 days';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 66).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [1,2,3,4,5,6,7], \"dependency\" : 0, \"degree\": \"1.2\"}]', 'pg_dependencies');", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [1,2,3,4,5,6,7], \"dependency\" : 0, \"degree\": \"1.2\"}]', 'pg_dependencies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Update artifact quantities based on their type?", "schema": "CREATE TABLE Artifacts (ArtifactID INT, ArtifactType VARCHAR(50), Quantity INT); INSERT INTO Artifacts (ArtifactID, ArtifactType, Quantity) VALUES (1, 'Pottery', 25), (2, 'Tools', 12), (3, 'Pottery', 30);", "sql": "UPDATE Artifacts SET Quantity = CASE ArtifactType WHEN 'Pottery' THEN Quantity * 1.1 WHEN 'Tools' THEN Quantity * 1.2 END;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What is the total carbon emissions (in metric tons) for each industry, ranked from highest to lowest?", "schema": "CREATE TABLE carbon_emissions (industry VARCHAR(50), emissions FLOAT); INSERT INTO carbon_emissions (industry, emissions) VALUES ('Industry A', 50000, 'Industry B', 60000), ('Industry C', 40000, 'Industry D', 70000);", "sql": "SELECT industry, emissions, ROW_NUMBER() OVER (ORDER BY emissions DESC) as rank FROM carbon_emissions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 102, "num_statements": 1} {"question": "What is the earliest year an artwork was created by each artist?", "schema": "CREATE TABLE artists (id INT, name TEXT); INSERT INTO artists (id, name) VALUES (1, 'Artist 1'), (2, 'Artist 2'); CREATE TABLE artworks (id INT, title TEXT, year_created INT, artist_id INT); INSERT INTO artworks (id, title, year_created, artist_id) VALUES (1, 'Artwork 1', 2000, 1), (2, 'Artwork 2', 1990, 1), (3, 'Artwork 3', 2010, 2);", "sql": "SELECT ar.name, MIN(a.year_created) FROM artists ar INNER JOIN artworks a ON ar.id = a.artist_id GROUP BY ar.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the count of employees who have completed diversity and inclusion training?", "schema": "CREATE TABLE Training (EmployeeID INT, TrainingName VARCHAR(50)); INSERT INTO Training (EmployeeID, TrainingName) VALUES (1, 'Diversity and Inclusion Training'), (2, 'Cybersecurity Training'), (3, 'Diversity and Inclusion Training');", "sql": "SELECT COUNT(DISTINCT EmployeeID) FROM Training WHERE TrainingName = 'Diversity and Inclusion Training';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 597).", "schema": null, "sql": "select jsonb_path_query('\"2023-08-15 12:34:56.789\"', '$.timestamp(2.0)');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"2023-08-15 12:34:56.789\"', '$.timestamp(2.0)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of community policing events held in the last 30 days, categorized by district and event type?", "schema": "CREATE TABLE Districts (DistrictID INT, Name VARCHAR(50)); CREATE TABLE CommunityEvents (EventID INT, DistrictID INT, EventType VARCHAR(50), EventDate DATE);", "sql": "SELECT D.Name, C.EventType, COUNT(*) as EventCount FROM Districts D INNER JOIN CommunityEvents C ON D.DistrictID = C.DistrictID WHERE C.EventDate >= DATEADD(day, -30, GETDATE()) GROUP BY D.Name, C.EventType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 85).", "schema": null, "sql": "SELECT t1.\"C 1\", t2.c1, t3.c1 FROM \"S 1\".\"T 1\" t1 left join ft1 t2 full join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1.\"C 1\") OFFSET 100 LIMIT 10;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 34).", "schema": null, "sql": "select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, 'Hello');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, 'Hello')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the cancelable with bubbles being yes", "schema": "CREATE TABLE table_1507852_5 (cancelable VARCHAR, bubbles VARCHAR)", "sql": "SELECT cancelable FROM table_1507852_5 WHERE bubbles = 'Yes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what amount played tried for 60?", "schema": "CREATE TABLE table_12807904_5 (played VARCHAR, tries_for VARCHAR)", "sql": "SELECT COUNT(played) FROM table_12807904_5 WHERE tries_for = '60';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'float4' (example 23).", "schema": null, "sql": "INSERT INTO FLOAT4_TBL(f1) VALUES ('5 . 0');", "explanation": "DML from PostgreSQL core regression test for Float4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Increase the 'salary' of members in the 'manufacturing' union by 3% who have been part of the union for over 3 years.", "schema": "CREATE TABLE unions (id INT, name TEXT, industry TEXT); CREATE TABLE members (id INT, union_id INT, joining_date DATE, salary FLOAT); CREATE TABLE union_memberships (member_id INT, union_id INT);", "sql": "UPDATE members SET salary = salary * 1.03 WHERE union_id IN (SELECT id FROM unions WHERE industry = 'manufacturing') AND joining_date <= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What traditional arts events are planned for Kyoto in the next 6 months?", "schema": "CREATE TABLE TraditionalArtsEvents (id INT, city VARCHAR(50), country VARCHAR(50), event_name VARCHAR(100), event_date DATE); INSERT INTO TraditionalArtsEvents (id, city, country, event_name, event_date) VALUES (1, 'Kyoto', 'Japan', 'Kabuki', '2023-04-01'), (2, 'Kyoto', 'Japan', 'Ikebana', '2023-05-15');", "sql": "SELECT event_name FROM TraditionalArtsEvents WHERE city = 'Kyoto' AND event_date BETWEEN CURDATE() AND DATE_ADD(CURDATE(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which city has a First season of current spell in Segunda División smaller than 2013?", "schema": "CREATE TABLE table_name_69 (city VARCHAR, first_season_of_current_spell_in_segunda_división INTEGER)", "sql": "SELECT city FROM table_name_69 WHERE first_season_of_current_spell_in_segunda_división < 2013;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the average rating of hotels in 'Paris' that have a pool?", "schema": "CREATE TABLE hotels (hotel_id INT, name VARCHAR(50), city VARCHAR(50), rating DECIMAL(2,1), pool BOOLEAN); INSERT INTO hotels (hotel_id, name, city, rating, pool) VALUES (1, 'Hotel Ritz', 'Paris', 4.5, true), (2, 'Hotel de Crillon', 'Paris', 4.7, false);", "sql": "SELECT AVG(rating) FROM hotels WHERE city = 'Paris' AND pool = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 223).", "schema": null, "sql": "select jsonb '[]' @@ '$[*]';", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb '[]' @@ '$[*]') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 28, "num_statements": 1} {"question": "pgTAP test for Index (assertion 44).", "schema": null, "sql": "SELECT * FROM check_test(\n has_index( 'sometab', 'idx_bar', ARRAY['name', 'id'], 'whatever' ),\n false,\n 'has_index() invalid no schema',\n 'whatever',\n ' have: idx_bar ON sometab(numb, name)\n want: idx_bar ON sometab(name, id)'\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1} {"question": "What is the total fare collected for a specific train line in Berlin?", "schema": "CREATE TABLE train_lines (line_id INT, city VARCHAR(50)); INSERT INTO train_lines (line_id, city) VALUES (1, 'Berlin'), (2, 'Berlin'); CREATE TABLE fares_collected (line_id INT, fare DECIMAL(5,2)); INSERT INTO fares_collected (line_id, fare) VALUES (1, 500.00), (1, 750.00), (2, 300.00), (2, 400.00);", "sql": "SELECT SUM(fare) FROM fares_collected INNER JOIN train_lines ON fares_collected.line_id = train_lines.line_id WHERE city = 'Berlin' AND train_lines.line_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Show the total waste generation for each type", "schema": "CREATE TABLE waste_generation (id INT PRIMARY KEY, waste_type_id INT, generation_rate FLOAT); INSERT INTO waste_generation (id, waste_type_id, generation_rate) VALUES (1, 1, 50.5), (2, 2, 40.3); CREATE TABLE waste_types (id INT PRIMARY KEY, waste_type VARCHAR(255)); INSERT INTO waste_types (id, waste_type) VALUES (1, 'Plastic'), (2, 'Paper');", "sql": "SELECT waste_types.waste_type, SUM(waste_generation.generation_rate) FROM waste_types INNER JOIN waste_generation ON waste_types.id = waste_generation.waste_type_id GROUP BY waste_types.waste_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which bodyweight has a Total (kg) of 145.0?", "schema": "CREATE TABLE table_name_18 (bodyweight VARCHAR, total__kg_ VARCHAR)", "sql": "SELECT bodyweight FROM table_name_18 WHERE total__kg_ = '145.0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What were the LOA (metres) for the yacht where the skipper was Jez Fanstone?", "schema": "CREATE TABLE table_25595107_1 (loa__metres_ VARCHAR, skipper VARCHAR)", "sql": "SELECT loa__metres_ FROM table_25595107_1 WHERE skipper = 'Jez Fanstone';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the captain of the Gloucestershire Gladiators?", "schema": "CREATE TABLE table_18461635_1 (captain VARCHAR, team VARCHAR)", "sql": "SELECT captain FROM table_18461635_1 WHERE team = 'Gloucestershire Gladiators';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the outgoing manager who departed due to fc energie cottbus purchased rights?", "schema": "CREATE TABLE table_name_80 (outgoing_manager VARCHAR, manner_of_departure VARCHAR)", "sql": "SELECT outgoing_manager FROM table_name_80 WHERE manner_of_departure = 'fc energie cottbus purchased rights';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "List all socially responsible lending initiatives by microfinance institutions", "schema": "CREATE TABLE microfinance_institutions (institution_id INT, institution_name VARCHAR(50));", "sql": "CREATE TABLE lending_initiatives (initiative_id INT, initiative_name VARCHAR(50), institution_id INT); SELECT institution_name, initiative_name FROM microfinance_institutions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Name the german voice actor for rafael torres", "schema": "CREATE TABLE table_14960574_6 (german_voice_actor VARCHAR, spanish_voice_actor VARCHAR)", "sql": "SELECT german_voice_actor FROM table_14960574_6 WHERE spanish_voice_actor = 'Rafael Torres';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total average for Rank entries where the Lane listed is smaller than 4 and the Nationality listed is San Marino?", "schema": "CREATE TABLE table_name_7 (rank INTEGER, lane VARCHAR, nationality VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_7 WHERE lane < 4 AND nationality = 'san marino';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What points awarded are higher than 6 but smaller than 9", "schema": "CREATE TABLE table_name_51 (points_awarded__platinum_ INTEGER, points_awarded__gold_ VARCHAR, points_awarded__silver_ VARCHAR)", "sql": "SELECT MAX(points_awarded__platinum_) FROM table_name_51 WHERE points_awarded__gold_ = 9 AND points_awarded__silver_ > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the average speed of vessels that docked in the Port of Oakland in the past month?", "schema": "CREATE TABLE Vessel_Performance(Vessel_ID INT, Docking_Port VARCHAR(50), Docking_Date DATE, Average_Speed DECIMAL(5,2)); INSERT INTO Vessel_Performance VALUES (1, 'Port of Oakland', '2022-03-15', 15.5), (2, 'Port of Los Angeles', '2022-03-18', 18.3), (3, 'Port of Oakland', '2022-03-22', 14.9);", "sql": "SELECT AVG(Average_Speed) FROM Vessel_Performance WHERE Docking_Port = 'Port of Oakland' AND Docking_Date >= DATEADD(MONTH, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who lost that has a score of w 4-3?", "schema": "CREATE TABLE table_name_25 (loss VARCHAR, score VARCHAR)", "sql": "SELECT loss FROM table_name_25 WHERE score = 'w 4-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the success rate of the public defender's office in the city of Chicago?", "schema": "CREATE TABLE cases (id INT, city VARCHAR(255), office VARCHAR(255), result VARCHAR(255)); INSERT INTO cases (id, city, office, result) VALUES (1, 'Chicago', 'Public Defender', 'Won'), (2, 'Chicago', 'Public Defender', 'Lost'), (3, 'Chicago', 'Prosecutor', 'Won');", "sql": "SELECT (SUM(CASE WHEN result = 'Won' THEN 1 ELSE 0 END) / COUNT(*)) * 100 AS success_rate FROM cases WHERE city = 'Chicago' AND office = 'Public Defender';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "What is the average energy consumption for buildings owned by 'GreenCorp' and 'EcoInnovations'?", "schema": "CREATE TABLE GreenBuildings (id INT, name TEXT, owner TEXT, energy_consumption FLOAT); INSERT INTO GreenBuildings (id, name, owner, energy_consumption) VALUES (1, 'EcoTower', 'ACME Inc', 1500.0), (2, 'GreenSpire', 'GreenCorp', 1200.0), (3, 'GreenVista', 'ACME Inc', 1300.0), (4, 'GreenPlaza', 'EcoInnovations', 1000.0);", "sql": "SELECT AVG(energy_consumption) FROM GreenBuildings WHERE owner IN ('GreenCorp', 'EcoInnovations');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Calculate the average waste generation rate per day for each manufacturing plant in the month of February 2022.", "schema": "CREATE TABLE manufacturing_plants (id INT, name VARCHAR(50));CREATE TABLE waste_generation (plant_id INT, date DATE, amount INT); INSERT INTO manufacturing_plants (id, name) VALUES (1, 'Plant A'), (2, 'Plant B'); INSERT INTO waste_generation (plant_id, date, amount) VALUES (1, '2022-02-01', 100), (1, '2022-02-03', 150), (1, '2022-02-05', 50), (2, '2022-02-02', 200), (2, '2022-02-04', 300), (2, '2022-02-06', 100);", "sql": "SELECT m.name, AVG(w.amount / 31.0) AS avg_daily_amount FROM manufacturing_plants m INNER JOIN waste_generation w ON m.id = w.plant_id WHERE w.date >= '2022-02-01' AND w.date <= '2022-02-28' GROUP BY m.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Champion has a Venue of old waverly golf club?", "schema": "CREATE TABLE table_name_35 (champion VARCHAR, venue VARCHAR)", "sql": "SELECT champion FROM table_name_35 WHERE venue = 'old waverly golf club';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'inet' (example 2).", "schema": null, "sql": "CREATE TABLE INET_TBL (c cidr, i inet);", "explanation": "DDL from PostgreSQL core regression test for Inet.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Update the community representation for subscriber 5 to 'Native Hawaiian or Other Pacific Islander'.", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, data_usage FLOAT, community_representation VARCHAR(30)); INSERT INTO mobile_subscribers (subscriber_id, data_usage, community_representation) VALUES (1, 30.5, 'Latinx'), (2, 40.3, 'Black/African American'), (3, 50.2, 'Native American'), (4, 60.1, 'Asian'), (5, 35.6, 'Pacific Islander');", "sql": "UPDATE mobile_subscribers SET community_representation = 'Native Hawaiian or Other Pacific Islander' WHERE subscriber_id = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 402).", "schema": null, "sql": "select unnest(array[1,2,3,null,4,null,null,5,6]::text[]);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select unnest(array[1,2,3,null,4,null,null,5,6]::text[])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which show was nominated for the ITA Milestone Award at the Indian Television Academy Awards?", "schema": "CREATE TABLE table_name_85 (show VARCHAR, award_ceremony VARCHAR, category VARCHAR)", "sql": "SELECT show FROM table_name_85 WHERE award_ceremony = 'indian television academy awards' AND category = 'ita milestone award';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Calculate the average distance of freight routes in Germany.", "schema": "CREATE TABLE routes (id INT, start_location VARCHAR(50), end_location VARCHAR(50), distance INT, country VARCHAR(50)); INSERT INTO routes VALUES (1, 'Location A', 'Location B', 100, 'Germany'), (2, 'Location A', 'Location C', 200, 'France'), (3, 'Location B', 'Location C', 150, 'Germany');", "sql": "SELECT AVG(distance) as avg_distance FROM routes WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--5.4.0--5.4.1, item 41).", "schema": null, "sql": "CREATE OR REPLACE PROCEDURE @extschema@.run_analyze(p_skip_locked boolean DEFAULT false, p_quiet boolean DEFAULT false, p_parent_table text DEFAULT NULL)\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nv_adv_lock boolean;\nv_parent_schema text;\nv_parent_tablename text;\nv_row record;\nv_sql text;\n\nBEGIN\n\nv_adv_lock := pg_catalog.pg_try_advisory_lock(hashtext('pg_partman run_analyze'));\nIF v_adv_lock = false THEN\n RAISE NOTICE 'Partman analyze already running or another session has not released its advisory lock.';\n RETURN;\nEND IF;\n\nFOR v_row IN SELECT parent_table FROM @extschema@.part_config\nLOOP\n\n IF p_parent_table IS NOT NULL THEN\n IF p_parent_table != v_row.parent_table THEN\n CONTINUE;\n END IF;\n END IF;\n\n SELECT n.nspname, c.relname\n INTO v_parent_schema, v_parent_tablename\n FROM pg_catalog.pg_class c\n JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\n WHERE n.nspname = pg_catalog.split_part(v_row.parent_table, '.', 1)::name\n AND c.relname = pg_catalog.split_part(v_row.parent_table, '.', 2)::name;\n\n v_sql := 'ANALYZE ';\n IF p_skip_locked THEN\n v_sql := v_sql || 'SKIP LOCKED ';\n END IF;\n v_sql := pg_catalog.format('%s %I.%I', v_sql, v_parent_schema, v_parent_tablename);\n\n IF p_quiet = 'false' THEN\n RAISE NOTICE 'Analyzed partitioned table: %.%', v_parent_schema, v_parent_tablename;\n END IF;\n EXECUTE v_sql;\n COMMIT;\n\nEND LOOP;\n\nPERFORM pg_catalog.pg_advisory_unlock(hashtext('pg_partman run_analyze'));\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1585, "num_statements": 24} {"question": "Determine the daily revenue for the past week", "schema": "CREATE TABLE sales (sale_id INT, sale_date DATE, dish_id INT, quantity INT, price DECIMAL(5,2)); INSERT INTO sales (sale_id, sale_date, dish_id, quantity, price) VALUES (1, '2022-01-01', 1, 2, 12.99), (2, '2022-01-01', 2, 1, 15.99), (3, '2022-01-02', 3, 3, 9.99);", "sql": "SELECT SUM(quantity * price) as daily_revenue FROM sales WHERE sale_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY) AND CURRENT_DATE GROUP BY sale_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the artist with catalog number ZK 34354?", "schema": "CREATE TABLE table_name_38 (artist VARCHAR, catalog__number VARCHAR)", "sql": "SELECT artist FROM table_name_38 WHERE catalog__number = 'zk 34354';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of military bases in the Asia-Pacific region?", "schema": "CREATE TABLE military_bases (id INT, base_name VARCHAR(255), region VARCHAR(255)); INSERT INTO military_bases (id, base_name, region) VALUES (1, 'Base A', 'Asia-Pacific'), (2, 'Base B', 'Europe'), (3, 'Base C', 'Asia-Pacific');", "sql": "SELECT COUNT(*) FROM military_bases WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total when the league cup is less than 1, and the fa cup is less than 1?", "schema": "CREATE TABLE table_name_45 (total INTEGER, league_cup VARCHAR, fa_cup VARCHAR)", "sql": "SELECT SUM(total) FROM table_name_45 WHERE league_cup < 1 AND fa_cup < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Valueset (assertion 288).", "schema": null, "sql": "-- Handle failure due to column count mismatch.\nSELECT * FROM check_test(\n set_hasnt( 'VALUES (1), (2)', 'VALUES (''foo'', 1), (''bar'', 2)' ),\n false,\n 'set_hasnt((int), (text,int))',\n '',\n ' Columns differ between queries:\n have: (integer)\n want: (text,integer)'\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 299, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Player, when Total is \"294\", and when Year(s) Won is \"1955\"?", "schema": "CREATE TABLE table_name_61 (player VARCHAR, total VARCHAR, year_s__won VARCHAR)", "sql": "SELECT player FROM table_name_61 WHERE total = 294 AND year_s__won = '1955';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "How many applicants were there for each job opening, and which openings had more than 50 applicants?", "schema": "CREATE TABLE JobOpenings (OpeningID INT, Position VARCHAR(50), Department VARCHAR(50), TotalApplicants INT); INSERT INTO JobOpenings (OpeningID, Position, Department, TotalApplicants) VALUES (1, 'Developer', 'IT', 75), (2, 'Manager', 'HR', 30), (3, 'Designer', 'Marketing', 60), (4, 'Analyst', 'Finance', 45);", "sql": "SELECT OpeningID, Position, Department, TotalApplicants FROM JobOpenings WHERE TotalApplicants > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average unit price of all the tracks?", "schema": "CREATE TABLE TRACK (UnitPrice INTEGER)", "sql": "SELECT AVG(UnitPrice) FROM TRACK;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "List all donors, their contact information, and the total amount they have donated to each program, along with program details.", "schema": "CREATE TABLE donors (id INT, name TEXT, email TEXT); CREATE TABLE donations (id INT, donor_id INT, program_id INT, amount DECIMAL); CREATE TABLE programs (id INT, name TEXT, location TEXT, budget INT);", "sql": "SELECT donors.name as donor_name, programs.name as program_name, SUM(donations.amount) as total_donation FROM donors INNER JOIN donations ON donors.id = donations.donor_id INNER JOIN programs ON donations.program_id = programs.id GROUP BY donors.id, programs.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 262, "num_statements": 1} {"question": "What is the total budget allocated for each department in 2024?", "schema": "CREATE TABLE Budget (BudgetID int, DepartmentID int, Amount decimal, StartDate date, EndDate date); INSERT INTO Budget (BudgetID, DepartmentID, Amount, StartDate, EndDate) VALUES (1, 1, 50000, '2024-01-01', '2024-12-31'); INSERT INTO Budget (BudgetID, DepartmentID, Amount, StartDate, EndDate) VALUES (2, 2, 70000, '2024-01-01', '2024-12-31');", "sql": "SELECT DepartmentID, SUM(Amount) as TotalBudget FROM Budget WHERE YEAR(StartDate) = 2024 AND YEAR(EndDate) = 2024 GROUP BY DepartmentID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Which artists have their works exhibited in the 'Modern Art Museum'?", "schema": "CREATE TABLE Artists (ArtistID int, Name varchar(50), Nationality varchar(50)); INSERT INTO Artists (ArtistID, Name, Nationality) VALUES (1, 'Pablo Picasso', 'Spanish'), (2, 'Vincent van Gogh', 'Dutch'); CREATE TABLE Exhibitions (ExhibitionID int, Title varchar(50), Artists varchar(50), Museum varchar(50)); INSERT INTO Exhibitions (ExhibitionID, Title, Artists, Museum) VALUES (1, 'Modern Art Masterpieces', 'Pablo Picasso, Vincent van Gogh', 'Modern Art Museum'), (2, 'Impressionist Icons', 'Claude Monet', 'Metropolitan Museum');", "sql": "SELECT Artists.Name FROM Artists INNER JOIN Exhibitions ON Artists.ArtistID = Cast(Split_Part(Exhibitions.Artists, ',', 1) AS int) WHERE Exhibitions.Museum = 'Modern Art Museum';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "PostgreSQL Advanced: show example 5.", "schema": null, "sql": "BEGIN; UPDATE accounts SET balance = balance - 100.00 WHERE name = 'Alice'; -- etc etc COMMIT;", "explanation": "Example from PostgreSQL documentation on Advanced.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: Who wrote the episode that originally aired on March 1, 1998?", "schema": "CREATE TABLE table_name_42 (written_by VARCHAR, original_airdate VARCHAR)", "sql": "SELECT written_by FROM table_name_42 WHERE original_airdate = 'march 1, 1998';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total pick number of Virginia Tech?", "schema": "CREATE TABLE table_name_68 (pick VARCHAR, school VARCHAR)", "sql": "SELECT COUNT(pick) FROM table_name_68 WHERE school = 'virginia tech';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Find the total number of safety tests passed by US-based automakers in the \"safety_testing\" table.", "schema": "CREATE TABLE safety_testing (id INT, automaker VARCHAR(50), country VARCHAR(50), tests_passed INT);", "sql": "SELECT SUM(tests_passed) FROM safety_testing WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest rank for a nation with 29 total medals, over 5 silvers, and under 16 bronze?", "schema": "CREATE TABLE table_name_71 (rank INTEGER, bronze VARCHAR, silver VARCHAR, total VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_71 WHERE silver > 5 AND total = 29 AND bronze < 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people attended Junction Oval?", "schema": "CREATE TABLE table_name_17 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT SUM(crowd) FROM table_name_17 WHERE venue = 'junction oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game that had a loss of Wegman (2-6)?", "schema": "CREATE TABLE table_name_7 (score VARCHAR, loss VARCHAR)", "sql": "SELECT score FROM table_name_7 WHERE loss = 'wegman (2-6)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of non-fungible tokens (NFTs) minted on the Flow network?", "schema": "CREATE TABLE flow_nfts (nft_id INT, mint_timestamp TIMESTAMP);", "sql": "SELECT COUNT(nft_id) FROM flow_nfts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the minimum and maximum funding amount for startups founded by people from underrepresented racial or ethnic backgrounds in the sustainable energy sector?", "schema": "CREATE TABLE startups(id INT, name TEXT, industry TEXT, foundation_date DATE, founder_race TEXT, funding FLOAT); INSERT INTO startups(id, name, industry, foundation_date, founder_race, funding) VALUES (1, 'GreenPower', 'Sustainable Energy', '2018-01-01', 'Hispanic', 2000000);", "sql": "SELECT MIN(funding), MAX(funding) FROM startups WHERE industry = 'Sustainable Energy' AND founder_race IN ('African American', 'Hispanic', 'Native American', 'Pacific Islander');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Delete records in the 'FuelingStations' table where 'station_type' is 'Hydrogen'", "schema": "CREATE TABLE FuelingStations (station_id INT, station_type VARCHAR(10), PRIMARY KEY (station_id));", "sql": "DELETE FROM FuelingStations WHERE station_type = 'Hydrogen';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Identify the autonomous vehicles with the lowest price per mile in each country", "schema": "CREATE TABLE autonomous_vehicles (vehicle_id INT, vehicle_name VARCHAR(255), price_per_mile DECIMAL(5,2), country VARCHAR(255));", "sql": "SELECT vehicle_name, price_per_mile, country FROM (SELECT vehicle_name, price_per_mile, country, ROW_NUMBER() OVER (PARTITION BY country ORDER BY price_per_mile ASC) as rn FROM autonomous_vehicles) t WHERE rn = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 213, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all the names of schools with an endowment amount smaller than or equal to 10.", "schema": "CREATE TABLE school (school_name VARCHAR, school_id VARCHAR); CREATE TABLE endowment (school_id VARCHAR, amount INTEGER)", "sql": "SELECT T2.school_name FROM endowment AS T1 JOIN school AS T2 ON T1.school_id = T2.school_id GROUP BY T1.school_id HAVING SUM(T1.amount) <= 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Which countries have the least number of impact investing organizations?", "schema": "CREATE TABLE impact_investing_orgs (name TEXT, country TEXT); INSERT INTO impact_investing_orgs (name, country) VALUES ('Acme Impact', 'USA'), ('GreenTech Initiatives', 'Canada'), ('EcoVentures', 'USA'), ('Global Philanthropic', 'UK'), ('Sustainable Development Foundation', 'Brazil'), ('Green Initiatives', 'India');", "sql": "SELECT country, COUNT(*) as org_count FROM impact_investing_orgs GROUP BY country ORDER BY org_count ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most lanes used in races with more than 1 heat and a winning mark of 7.25?", "schema": "CREATE TABLE table_name_60 (lane INTEGER, heat VARCHAR, mark VARCHAR)", "sql": "SELECT MAX(lane) FROM table_name_60 WHERE heat > 1 AND mark = '7.25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most bronze when the total is 14, and there are more than 6 gold?", "schema": "CREATE TABLE table_name_99 (bronze INTEGER, total VARCHAR, gold VARCHAR)", "sql": "SELECT MAX(bronze) FROM table_name_99 WHERE total = 14 AND gold > 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the height of Anthony Crater?", "schema": "CREATE TABLE table_29418619_1 (height VARCHAR, name VARCHAR)", "sql": "SELECT height FROM table_29418619_1 WHERE name = 'Anthony Crater';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 92).", "schema": null, "sql": "select jsonb_path_query('[1,2,3]', '$[last ? (@.type() == \"string\")]');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[1,2,3]', '$[last ? (@.type() == \"string\")]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average prize pool per game for esports events in North America?", "schema": "CREATE TABLE EsportsEvents (EventID INT, EventName VARCHAR(50), Game VARCHAR(50), Location VARCHAR(50), PrizePool DECIMAL(10,2)); INSERT INTO EsportsEvents (EventID, EventName, Game, Location, PrizePool) VALUES (1, 'Fortnite World Cup', 'Fortnite', 'New York, USA', 30000000.00); INSERT INTO EsportsEvents (EventID, EventName, Game, Location, PrizePool) VALUES (2, 'PUBG Global Invitational', 'PUBG', 'Berlin, Germany', 2000000.00); INSERT INTO EsportsEvents (EventID, EventName, Game, Location, PrizePool) VALUES (3, 'The International', 'Dota 2', 'Seattle, USA', 25000000.00); INSERT INTO EsportsEvents (EventID, EventName, Game, Location, PrizePool) VALUES (4, 'League of Legends World Championship', 'League of Legends', 'Paris, France', 6600000.00);", "sql": "SELECT Game, AVG(PrizePool) AS AvgPrizePool FROM EsportsEvents WHERE Location LIKE '%USA%' GROUP BY Game;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 298).", "schema": null, "sql": "SELECT '2011-03-27 01:59:59'::timestamp AT TIME ZONE 'MSK';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-27 01:59:59'::timestamp AT TIME ZONE 'MSK') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 103).", "schema": null, "sql": "SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, 4: NULL, '5': 'a' ABSENT ON NULL WITH UNIQUE RETURNING jsonb);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, 4: NULL, '5': 'a' ABSENT ON NULL WITH UNIQUE RETURNING jsonb)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 106, "num_statements": 1} {"question": "Delete all records from the marine_conservation_orgs table", "schema": "CREATE TABLE marine_conservation_orgs (org_name TEXT, country TEXT, year_founded INTEGER);", "sql": "DELETE FROM marine_conservation_orgs;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 71).", "schema": null, "sql": "SELECT *\nFROM pg_stats\nWHERE schemaname = 'stats_import'\nAND tablename = 'test'\nAND inherited = false\nAND attname = 'id';", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT *\nFROM pg_stats\nWHERE schemaname = 'stats_import'\nAND tablename = 'test'\nAND inherited = false\nAND attname = 'id') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Find the average safety score of AI models created by African researchers since 2020.", "schema": "CREATE TABLE ModelScores (model_id INT, score FLOAT, dev_region VARCHAR(255), model_year INT); INSERT INTO ModelScores (model_id, score, dev_region, model_year) VALUES (1, 8.5, 'Africa', 2020), (2, 9.2, 'Asia', 2021), (3, 8.8, 'Europe', 2022);", "sql": "SELECT AVG(score) FROM ModelScores WHERE dev_region = 'Africa' AND model_year >= 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which To par is the highest one that has a Total smaller than 148?", "schema": "CREATE TABLE table_name_89 (to_par INTEGER, total INTEGER)", "sql": "SELECT MAX(to_par) FROM table_name_89 WHERE total < 148;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Winner had a Prize of zł2,153,999?", "schema": "CREATE TABLE table_name_22 (winner VARCHAR, prize VARCHAR)", "sql": "SELECT winner FROM table_name_22 WHERE prize = 'zł2,153,999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'oid8': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT o.* FROM OID8_TBL o WHERE o.f1 <= '1234';", "explanation": "Regression test for Oid8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT o.* FROM OID8_TBL o WHERE o.f1 <= '1234') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 164).", "schema": null, "sql": "SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value of the item \"Points\" when the value of the item \"Points against\" is 272?", "schema": "CREATE TABLE table_name_62 (points_for VARCHAR, points_against VARCHAR)", "sql": "SELECT points_for FROM table_name_62 WHERE points_against = '272';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Delete all records from table cargo_handling with port as 'Seattle'", "schema": "CREATE TABLE cargo_handling (id INT PRIMARY KEY, cargo_id INT, port VARCHAR(20)); INSERT INTO cargo_handling (id, cargo_id, port) VALUES (1, 101, 'New York'), (2, 102, 'Seattle'), (3, 103, 'Buenos Aires');", "sql": "DELETE FROM cargo_handling WHERE port = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 266).", "schema": null, "sql": "select '[:1]={1}'::int[];", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[:1]={1}'::int[]) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the part 3 of the verb in class 5?", "schema": "CREATE TABLE table_1745843_6 (part_3 VARCHAR, class VARCHAR)", "sql": "SELECT part_3 FROM table_1745843_6 WHERE class = '5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the play-by-play for Don Earle?", "schema": "CREATE TABLE table_name_84 (year VARCHAR, play_by_play VARCHAR)", "sql": "SELECT year FROM table_name_84 WHERE play_by_play = 'don earle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'index_including' (example 46).", "schema": null, "sql": "CREATE TABLE tbl (c1 int,c2 int, c3 int, c4 box,\n\t\t\t\tUNIQUE(c1,c2) INCLUDE(c3,c4));", "explanation": "DDL from PostgreSQL core regression test for Index Including.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "List all military equipment types and their corresponding sales revenue for the last quarter", "schema": "CREATE TABLE equipment_sales (equipment_type VARCHAR(255), sale_date DATE, revenue INT); INSERT INTO equipment_sales (equipment_type, sale_date, revenue) VALUES ('Tank', '2021-04-01', 5000000), ('Tank', '2021-07-01', 6000000), ('Jet', '2021-05-01', 8000000), ('Jet', '2021-08-01', 9000000);", "sql": "SELECT equipment_type, SUM(revenue) as total_revenue FROM equipment_sales WHERE sale_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 3 MONTH) AND DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of Week 5?", "schema": "CREATE TABLE table_name_3 (date VARCHAR, week VARCHAR)", "sql": "SELECT date FROM table_name_3 WHERE week = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the total salary expense for the company broken down by department?", "schema": "CREATE TABLE Employees (EmployeeID int, Department varchar(20), Salary numeric(10,2)); INSERT INTO Employees (EmployeeID, Department, Salary) VALUES (1, 'IT', 75000.00), (2, 'IT', 70000.00), (3, 'HR', 60000.00), (4, 'Finance', 80000.00);", "sql": "SELECT Department, SUM(Salary) FROM Employees GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average donation amount for 'international_donors' in the 'global_giving' table?", "schema": "CREATE TABLE global_giving (donor_type VARCHAR(20), avg_donation DECIMAL(10,2)); INSERT INTO global_giving (donor_type, avg_donation) VALUES ('international_donors', 250.00), ('local_donors', 100.00);", "sql": "SELECT AVG(avg_donation) FROM global_giving WHERE donor_type = 'international_donors';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the maximum cargo weight handled by the port of Rotterdam in a single day, grouped by month?", "schema": "CREATE TABLE cargo_handling (cargo_handling_id INT, port VARCHAR(255), cargo_weight INT, handling_date DATE);INSERT INTO cargo_handling (cargo_handling_id, port, cargo_weight, handling_date) VALUES (1, 'Rotterdam', 50000, '2022-01-01'), (2, 'Rotterdam', 55000, '2022-01-02');", "sql": "SELECT EXTRACT(MONTH FROM handling_date) AS month, MAX(cargo_weight) FROM cargo_handling WHERE port = 'Rotterdam' GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 163).", "schema": null, "sql": "insert into rtest_t9 values (19, 'Record should go to rtest_t5 and t7');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total calorie count of vegan dishes served at 'Plant Power'?", "schema": "CREATE TABLE Restaurants (name text); INSERT INTO Restaurants (name) VALUES ('Plant Power'); CREATE TABLE Menu (name text, restaurant text, food text, calories integer, diet text); INSERT INTO Menu (name, restaurant, food, calories, diet) VALUES ('Plant Power', 'Vegan Lasagna', 600, 'Vegan'), ('Plant Power', 'Chickpea Salad', 450, 'Vegan');", "sql": "SELECT SUM(calories) FROM Menu WHERE restaurant = 'Plant Power' AND diet = 'Vegan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average network investment in the 'Europe' region over the last year?", "schema": "CREATE TABLE network_investments (id INT, region VARCHAR(20), investment_date DATE, amount DECIMAL(10,2)); INSERT INTO network_investments (id, region, investment_date, amount) VALUES (1, 'Europe', '2022-01-01', 50000.00), (2, 'Asia', '2022-02-01', 75000.00), (3, 'Europe', '2022-03-01', 60000.00), (4, 'Africa', '2022-04-01', 45000.00);", "sql": "SELECT AVG(amount) FROM network_investments WHERE region = 'Europe' AND investment_date BETWEEN DATE_SUB('2022-04-01', INTERVAL 1 YEAR) AND '2022-04-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the style for Giuseppe Bausilio?", "schema": "CREATE TABLE table_name_31 (style VARCHAR, name VARCHAR)", "sql": "SELECT style FROM table_name_31 WHERE name = 'giuseppe bausilio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Update the donation amount for donor 'Karen Smith' to $6000.", "schema": "CREATE TABLE donations_update_2 (donor_id INT, donor_name VARCHAR(255), donation_amount INT); INSERT INTO donations_update_2 (donor_id, donor_name, donation_amount) VALUES (1, 'Karen Smith', 5000), (2, 'Brandon Johnson', 7000), (3, 'Sophia Rodriguez', 8000);", "sql": "UPDATE donations_update_2 SET donation_amount = 6000 WHERE donor_name = 'Karen Smith';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the entry names of catalog with the attribute possessed by most entries.", "schema": "CREATE TABLE Catalog_Contents_Additional_Attributes (catalog_entry_id VARCHAR, attribute_value VARCHAR); CREATE TABLE Catalog_Contents (catalog_entry_name VARCHAR, catalog_entry_id VARCHAR); CREATE TABLE Catalog_Contents_Additional_Attributes (attribute_value VARCHAR)", "sql": "SELECT t1.catalog_entry_name FROM Catalog_Contents AS t1 JOIN Catalog_Contents_Additional_Attributes AS t2 ON t1.catalog_entry_id = t2.catalog_entry_id WHERE t2.attribute_value = (SELECT attribute_value FROM Catalog_Contents_Additional_Attributes GROUP BY attribute_value ORDER BY COUNT(*) DESC LIMIT 1);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 304, "num_statements": 1} {"question": "Insert a new record into the 'habitat_preservation' table with these values: 1, 'Asia', 'Habitat Protection Alliance', '2023-01-01', '2023-12-31'", "schema": "CREATE TABLE habitat_preservation (id INT PRIMARY KEY, region VARCHAR(20), organization VARCHAR(30), start_date DATE, end_date DATE);", "sql": "INSERT INTO habitat_preservation (id, region, organization, start_date, end_date) VALUES (1, 'Asia', 'Habitat Protection Alliance', '2023-01-01', '2023-12-31');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rowsecurity' (example 23).", "schema": null, "sql": "-- setup of malicious function\nCREATE OR REPLACE FUNCTION f_leak(text) RETURNS bool\n COST 0.0000001 LANGUAGE plpgsql\n AS 'BEGIN RAISE NOTICE ''f_leak => %'', $1; RETURN true; END';", "explanation": "PL/pgSQL object from PostgreSQL core test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 186, "num_statements": 3} {"question": "How many community health workers are culturally competent in each region?", "schema": "CREATE TABLE CulturalCompetency (CHW_ID INT, Region VARCHAR(50), Competency_Level VARCHAR(50)); INSERT INTO CulturalCompetency (CHW_ID, Region, Competency_Level) VALUES (1, 'North', 'Expert'), (2, 'South', 'Intermediate'), (3, 'East', 'Expert'), (4, 'West', 'Beginner');", "sql": "SELECT c.Region, COUNT(*) as Competent_CHWs FROM CulturalCompetency c WHERE Competency_Level = 'Expert' GROUP BY c.Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What was the total number of donations made in 'Texas' in the year 2020?", "schema": "CREATE TABLE Donations (donation_id INT, region VARCHAR(20), amount DECIMAL(10,2), donation_year INT); INSERT INTO Donations (donation_id, region, amount, donation_year) VALUES (1, 'Texas', 1000.00, 2020), (2, 'Texas', 2000.00, 2020);", "sql": "SELECT COUNT(*) FROM Donations WHERE region = 'Texas' AND donation_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'dict_int' (example 28).", "schema": null, "sql": "select ts_lexize('intdict', '925860');", "explanation": "Example query from the 'dict_int' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "How many marine species are there in the 'Coral Reef' subcategory?", "schema": "CREATE TABLE marine_species_categories (species_id INTEGER, species_name VARCHAR(255), category VARCHAR(50), subcategory VARCHAR(50));", "sql": "SELECT COUNT(species_id) FROM marine_species_categories WHERE subcategory = 'Coral Reef';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "List the teams that have not made any ticket sales", "schema": "CREATE TABLE sports_teams (team_id INT, team_name VARCHAR(50)); INSERT INTO sports_teams (team_id, team_name) VALUES (1, 'TeamA'), (2, 'TeamB'), (3, 'TeamC'); CREATE TABLE ticket_sales (ticket_id INT, team_id INT, price DECIMAL(5,2)); INSERT INTO ticket_sales (ticket_id, team_id, price) VALUES (1, 1, 75.50), (2, 1, 85.20), (3, 2, 65.00), (4, 2, 75.00);", "sql": "SELECT s.team_name FROM sports_teams s LEFT JOIN ticket_sales t ON s.team_id = t.team_id WHERE t.ticket_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the total number of students who have utilized assistive technology?", "schema": "CREATE TABLE Assistive_Technology (student_id INT, accommodation VARCHAR(255)); INSERT INTO Assistive_Technology VALUES (1, 'Text-to-Speech');", "sql": "SELECT COUNT(DISTINCT student_id) FROM Assistive_Technology;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games did the team play when they were 1-3?", "schema": "CREATE TABLE table_13619053_4 (score VARCHAR, record VARCHAR)", "sql": "SELECT COUNT(score) FROM table_13619053_4 WHERE record = '1-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Which members have a gym membership and a yoga membership?", "schema": "CREATE TABLE gym_members(member_id INT, name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO gym_members(member_id, name, start_date, end_date) VALUES (1, 'John Doe', '2021-01-01', '2022-12-31'); INSERT INTO gym_members(member_id, name, start_date, end_date) VALUES (2, 'Jane Smith', '2021-05-15', '2023-05-14'); CREATE TABLE yoga_members(member_id INT, name VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO yoga_members(member_id, name, start_date, end_date) VALUES (1, 'John Doe', '2021-03-01', '2022-02-28'); INSERT INTO yoga_members(member_id, name, start_date, end_date) VALUES (3, 'Alice Johnson', '2021-06-01', '2023-05-31');", "sql": "SELECT gm.member_id, gm.name FROM gym_members gm INNER JOIN yoga_members ym ON gm.member_id = ym.member_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 136).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Eric');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "List the total sales for each brand in the \"makeup\" category for the last quarter.", "schema": "CREATE TABLE sales_quarterly (id INT, brand VARCHAR(100), category VARCHAR(100), sales_date DATE, revenue FLOAT);", "sql": "SELECT brand, SUM(revenue) as total_sales FROM sales_quarterly WHERE category = 'makeup' AND sales_date >= DATE_TRUNC('quarter', CURRENT_DATE - INTERVAL '3 months') GROUP BY brand;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "What is the total biomass of fish in US farms that are managed by farmers with more than 5 years of experience?", "schema": "CREATE TABLE us_farms (farmer_id INT, years_of_experience INT, biomass FLOAT); INSERT INTO us_farms (farmer_id, years_of_experience, biomass) VALUES (1, 7, 100.5), (2, 3, 50.2), (3, 6, 120.7);", "sql": "SELECT SUM(biomass) FROM us_farms WHERE years_of_experience > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the greatest lost where played is less than 9?", "schema": "CREATE TABLE table_name_51 (lost INTEGER, played INTEGER)", "sql": "SELECT MAX(lost) FROM table_name_51 WHERE played < 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Chassis did Memo Gidley use?", "schema": "CREATE TABLE table_name_37 (chassis VARCHAR, drivers VARCHAR)", "sql": "SELECT chassis FROM table_name_37 WHERE drivers = 'memo gidley';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many integrated allied-related are there?", "schema": "CREATE TABLE table_11944282_1 (integrated VARCHAR, allied_related VARCHAR)", "sql": "SELECT integrated FROM table_11944282_1 WHERE allied_related = 'Many';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "How many crime incidents occurred in the last week, broken down by day of the week?", "schema": "CREATE TABLE incidents (iid INT, incident_time TIMESTAMP);", "sql": "SELECT DATE_FORMAT(i.incident_time, '%W') AS day_of_week, COUNT(i.iid) FROM incidents i WHERE i.incident_time >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 WEEK) GROUP BY day_of_week;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "Which sustainable material has the highest total revenue in the ethical fashion industry?", "schema": "CREATE TABLE orders (order_id INT, material VARCHAR(50), revenue DECIMAL(10,2)); INSERT INTO orders (order_id, material, revenue) VALUES (1, 'organic cotton', 100.00), (2, 'recycled polyester', 150.00), (3, 'organic cotton', 200.00);", "sql": "SELECT material, SUM(revenue) AS total_revenue FROM orders GROUP BY material ORDER BY total_revenue DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Kerry's lowest number of votes where Other received 0.8% and Kerry 70.4%?", "schema": "CREATE TABLE table_name_65 (kerry__number INTEGER, others__percentage VARCHAR, kerry__percentage VARCHAR)", "sql": "SELECT MIN(kerry__number) FROM table_name_65 WHERE others__percentage = '0.8%' AND kerry__percentage = '70.4%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the total number of companies founded by people from underrepresented communities in the education sector?", "schema": "CREATE TABLE Companies (id INT, name TEXT, industry TEXT, founders TEXT, diversity TEXT); INSERT INTO Companies (id, name, industry, founders, diversity) VALUES (1, 'EduForward', 'Education', 'Diverse Team', 'Underrepresented'); INSERT INTO Companies (id, name, industry, founders, diversity) VALUES (2, 'TechBoost', 'Technology', 'Non-Diverse Team', 'Not Underrepresented');", "sql": "SELECT COUNT(*) FROM Companies WHERE industry = 'Education' AND diversity = 'Underrepresented';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Which destinations had a YoY visitor growth rate above 50% in 2020 compared to 2019?", "schema": "CREATE TABLE visitor_data (destination VARCHAR(50), year INT, visitors INT);", "sql": "SELECT destination, ((visitors_2020 / NULLIF(visitors_2019, 0) - 1) * 100) as yoy_growth_rate FROM visitor_data WHERE year IN (2019, 2020) GROUP BY destination HAVING MAX(yoy_growth_rate) > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 283).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb 'null', '$Xyz' PASSING 1 AS \"Xyz\");", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb 'null', '$Xyz' PASSING 1 AS \"Xyz\")) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 59, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 175).", "schema": null, "sql": "/****************************************************************************/\n-- Test has_operator().\n\nSELECT * FROM check_test(\n has_operator( 'integer', 'pg_catalog', '<=', 'int', 'bool', 'desc' ),\n true,\n 'has_operator( left, schema, name, right, result, desc )',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 288, "num_statements": 1} {"question": "How many tour operators in Canada offer eco-friendly tours?", "schema": "CREATE TABLE tour_operators (id INT, name TEXT, country TEXT, eco_friendly BOOLEAN); INSERT INTO tour_operators (id, name, country, eco_friendly) VALUES (1, 'Eco Tours Canada', 'Canada', true), (2, 'Nature Adventures', 'Canada', false), (3, 'Green Travels', 'Canada', true);", "sql": "SELECT COUNT(*) FROM tour_operators WHERE country = 'Canada' AND eco_friendly = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 182).", "schema": null, "sql": "SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 118, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the D 48 √ with a D 49 √ with r 9?", "schema": "CREATE TABLE table_name_27 (d_48_√ VARCHAR, d_49_√ VARCHAR)", "sql": "SELECT d_48_√ FROM table_name_27 WHERE d_49_√ = 'r 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the number of patients who received group therapy for depression in clinics located in Texas?", "schema": "CREATE TABLE clinics (clinic_id INT, clinic_name VARCHAR(50), city VARCHAR(50), state VARCHAR(50)); INSERT INTO clinics (clinic_id, clinic_name, city, state) VALUES (1, 'ClinicC', 'Houston', 'TX'), (2, 'ClinicD', 'Austin', 'TX'); CREATE TABLE patients (patient_id INT, patient_name VARCHAR(50), age INT, clinic_id INT, condition_id INT); INSERT INTO patients (patient_id, patient_name, age, clinic_id, condition_id) VALUES (1, 'James Doe', 35, 1, 1), (2, 'Jasmine Smith', 28, 1, 2), (3, 'Alice Johnson', 42, 2, 3); CREATE TABLE conditions (condition_id INT, condition_name VARCHAR(50)); INSERT INTO conditions (condition_id, condition_name) VALUES (1, 'Depression'), (2, 'Anxiety Disorder'), (3, 'Bipolar Disorder'); CREATE TABLE therapies (therapy_id INT, therapy_name VARCHAR(50), patient_id INT, therapy_type VARCHAR(50)); INSERT INTO therapies (therapy_id, therapy_name, patient_id, therapy_type) VALUES (1, 'Group Therapy', 1, 'Group Therapy'), (2, 'Individual Therapy', 2, 'Individual Therapy');", "sql": "SELECT COUNT(*) FROM patients p JOIN clinics c ON p.clinic_id = c.clinic_id JOIN therapies t ON p.patient_id = t.patient_id JOIN conditions cond ON p.condition_id = cond.condition_id WHERE c.state = 'TX' AND cond.condition_name = 'Depression' AND t.therapy_type = 'Group Therapy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 280, "num_statements": 1} {"question": "Update the energy rating of a property in the 'green_buildings' table with id 3 to 80.", "schema": "CREATE TABLE green_buildings (id INT, square_footage FLOAT, energy_rating INT);", "sql": "UPDATE green_buildings SET energy_rating = 80 WHERE id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Identify the number of unique esports events and their types in the last 12 months, excluding 'Exhibition' events.", "schema": "CREATE TABLE EsportsEvents (EventID INT, EventName TEXT, EventType TEXT, EventDate DATE); INSERT INTO EsportsEvents (EventID, EventName, EventType, EventDate) VALUES (1, 'ELC', 'League', '2022-01-01'), (2, 'DAC', 'Championship', '2022-02-15'), (3, 'GCS', 'Cup', '2021-12-10'), (4, 'WCS', 'Series', '2022-04-20'), (5, 'EPL', 'League', '2022-05-05'), (6, 'IEM', 'Cup', '2022-06-12'), (7, 'EXPO', 'Exhibition', '2022-07-01');", "sql": "SELECT COUNT(DISTINCT EventID), EventType FROM EsportsEvents WHERE EventDate >= DATEADD(month, -12, GETDATE()) AND EventType != 'Exhibition' GROUP BY EventType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "PostgreSQL Textsearch: show example 64.", "schema": null, "sql": "SELECT * FROM ts_debug('english', 'Paris'); alias | description | token | dictionaries | dictionary | lexemes -----------+-----------------+-------+----------------+--------------+--------- asciiword | Word, all ASCII | Paris | {english_stem} | english_stem | {pari} CREATE TEXT SEARCH DICTIONARY my_synonym ( TEMPLATE = synonym, SYNONYMS = my_synonyms ); ALTER TEXT SEARCH CONFIGURATION english ALTER MAPPING FOR asciiword WITH my_synonym, english_stem; SELECT * FROM ts_debug('english', 'Paris'); alias | description | token | dictionaries | dictionary | lexemes -----------+-----------------+-------+---------------------------+------------+--------- asciiword | Word, all ASCII | Paris | {my_synonym,english_stem} | my_synonym | {paris};", "explanation": "Example from PostgreSQL documentation on Textsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 741, "num_statements": 5} {"question": "Calculate the total transaction amount for each customer in the \"Customers\" table, excluding transactions with amounts less than $100.", "schema": "CREATE TABLE Customers (CustomerID INT, TransactionDate DATE, TransactionAmount DECIMAL(10,2));", "sql": "SELECT CustomerID, SUM(TransactionAmount) as TotalTransactionAmount FROM Customers WHERE TransactionAmount >= 100 GROUP BY CustomerID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "How many unique countries do players who have played Dota 2 come from?", "schema": "CREATE TABLE players (id INT, name VARCHAR(50), age INT, game VARCHAR(50), country VARCHAR(50)); INSERT INTO players (id, name, age, game, country) VALUES (1, 'Jane Doe', 22, 'Dota 2', 'USA');", "sql": "SELECT COUNT(DISTINCT country) AS num_countries FROM players WHERE game = 'Dota 2';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many satellites were launched by each country in the satellite_launches table?", "schema": "CREATE TABLE satellite_launches (country VARCHAR(30), launch_year INT, satellites INT); INSERT INTO satellite_launches VALUES ('USA', 1958, 1), ('USSR', 1957, 1), ('USA', 1959, 3), ('USSR', 1960, 4), ('USA', 1961, 4), ('USSR', 1962, 3);", "sql": "SELECT country, COUNT(satellites) OVER (PARTITION BY country) FROM satellite_launches;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 86, "num_statements": 1} {"question": "What is the maximum temperature recorded by any spacecraft during a space mission?", "schema": "CREATE TABLE Spacecraft (SpacecraftID INT, MaximumTemperature FLOAT);", "sql": "SELECT MAX(MaximumTemperature) FROM Spacecraft;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the game on October 6, 1974?", "schema": "CREATE TABLE table_14954150_1 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_14954150_1 WHERE date = 'October 6, 1974';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What College has a Player that is jermaine romans?", "schema": "CREATE TABLE table_name_79 (college VARCHAR, player VARCHAR)", "sql": "SELECT college FROM table_name_79 WHERE player = 'jermaine romans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of refugee families supported by each organization in the last 3 months?", "schema": "CREATE TABLE NGOs (NGOID int, NGOName varchar(50)); INSERT INTO NGOs (NGOID, NGOName) VALUES (1, 'World Vision'), (2, 'Catholic Relief Services'); CREATE TABLE RefugeeSupport (SupportID int, NGOID int, FamilyID int, SupportDate date); INSERT INTO RefugeeSupport (SupportID, NGOID, FamilyID, SupportDate) VALUES (1, 1, 1, '2022-05-01'), (2, 1, 2, '2022-06-01'), (3, 2, 1, '2022-07-01');", "sql": "SELECT NGOName, COUNT(DISTINCT FamilyID) as SupportedFamilies FROM NGOs INNER JOIN RefugeeSupport ON NGOs.NGOID = RefugeeSupport.NGOID WHERE SupportDate >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) GROUP BY NGOName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "What are the names and locations of rural infrastructure projects with a budget over 300000?", "schema": "CREATE TABLE rural_projects (id INT, name TEXT, location TEXT, budget FLOAT); INSERT INTO rural_projects (id, name, location, budget) VALUES (1, 'Water Supply', 'Kenya', 500000.00), (2, 'Electricity Grid', 'Tanzania', 200000.00), (3, 'Road Construction', 'Uganda', 400000.00);", "sql": "SELECT name, location FROM rural_projects WHERE budget > 300000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of inclusive housing units in Portland and San Francisco?", "schema": "CREATE TABLE housing (id INT, units INT, city VARCHAR(20), inclusive BOOLEAN); INSERT INTO housing (id, units, city, inclusive) VALUES (1, 50, 'Portland', TRUE), (2, 75, 'San Francisco', TRUE), (3, 100, 'NYC', FALSE);", "sql": "SELECT SUM(units) FROM housing WHERE inclusive = TRUE AND city IN ('Portland', 'San Francisco');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "List the donors who have donated more than $10,000 in total, and their corresponding donation dates.", "schema": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), Country varchar(50), AmountDonated float); INSERT INTO Donors (DonorID, DonorName, Country, AmountDonated) VALUES (1, 'John Doe', 'USA', 15000.00), (2, 'Jane Smith', 'Canada', 20000.00);", "sql": "SELECT DonorName, DonationDate FROM Donors D JOIN Donations DON ON D.DonorID = DON.DonorID WHERE D.DonorID IN (SELECT DonorID FROM Donors WHERE AmountDonated > 10000.00);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "List all the unique investment products and their associated asset classes.", "schema": "CREATE TABLE investment_products (id INT, name VARCHAR(50), asset_class VARCHAR(50)); INSERT INTO investment_products (id, name, asset_class) VALUES (1, 'Stock A', 'Equities'), (2, 'Bond B', 'Fixed Income'), (3, 'Mutual Fund C', 'Equities'), (4, 'ETF D', 'Commodities');", "sql": "SELECT DISTINCT name, asset_class FROM investment_products;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the position with an overall less than 128 for Brigham Young college?", "schema": "CREATE TABLE table_name_87 (position VARCHAR, overall VARCHAR, college VARCHAR)", "sql": "SELECT position FROM table_name_87 WHERE overall < 128 AND college = 'brigham young';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Melbourne's score as the home team?", "schema": "CREATE TABLE table_name_16 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_16 WHERE home_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show the number of new donors per month in 2022.", "schema": "CREATE TABLE donors (id INT, donation_date DATE); INSERT INTO donors (id, donation_date) VALUES (1, '2022-01-15'), (2, '2022-02-10'), (3, '2022-02-25'), (4, '2022-03-05');", "sql": "SELECT DATE_FORMAT(donation_date, '%Y-%m') as month, COUNT(DISTINCT id) as new_donors FROM donors WHERE YEAR(donation_date) = 2022 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Determine the number of unique IP addresses associated with each threat category in the last 90 days.", "schema": "CREATE TABLE ThreatIPs (Id INT, Threat VARCHAR(255), IP VARCHAR(255), Timestamp DATETIME); INSERT INTO ThreatIPs (Id, Threat, IP, Timestamp) VALUES (1, 'Ransomware', '192.168.1.1', '2022-01-01 10:00:00'), (2, 'Phishing', '192.168.1.2', '2022-01-02 12:00:00'), (3, 'Ransomware', '192.168.1.3', '2022-01-03 14:00:00');", "sql": "SELECT Threat, COUNT(DISTINCT IP) as IPCount FROM ThreatIPs WHERE Timestamp >= DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 90 DAY) GROUP BY Threat;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What was the average funding received per community development initiative in Ghana from 2016 to 2018?", "schema": "CREATE TABLE community_funding (initiative_id INT, country TEXT, funding INT, year INT); INSERT INTO community_funding (initiative_id, country, funding, year) VALUES (1, 'Ghana', 50000, 2016), (2, 'Ghana', 55000, 2017), (3, 'Ghana', 60000, 2018), (4, 'Ghana', 65000, 2019);", "sql": "SELECT AVG(funding) FROM community_funding WHERE country = 'Ghana' AND year BETWEEN 2016 AND 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 20 questions when centerfold model is cherie witter", "schema": "CREATE TABLE table_1566848_6 (centerfold_model VARCHAR)", "sql": "SELECT 20 AS _questions FROM table_1566848_6 WHERE centerfold_model = 'Cherie Witter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "How many units of each product type were sold in Michigan dispensaries in the past month?", "schema": "CREATE TABLE ProductTypes (ProductTypeID INT, ProductType VARCHAR(100)); CREATE TABLE DispensaryTransactions (TransactionID INT, ProductTypeID INT, QuantitySold INT, TransactionDate DATE, DispensaryID INT);", "sql": "SELECT PT.ProductType, SUM(DT.QuantitySold) as TotalQuantitySold FROM ProductTypes PT JOIN DispensaryTransactions DT ON PT.ProductTypeID = DT.ProductTypeID WHERE DT.TransactionDate >= DATEADD(month, -1, GETDATE()) AND DT.DispensaryID IN (SELECT DispensaryID FROM Dispensaries WHERE State = 'Michigan') GROUP BY PT.ProductType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 326, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which college have both players with position midfielder and players with position defender?", "schema": "CREATE TABLE match_season (College VARCHAR, POSITION VARCHAR)", "sql": "SELECT College FROM match_season WHERE POSITION = 'Midfielder' INTERSECT SELECT College FROM match_season WHERE POSITION = 'Defender';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total number of games on January 20?", "schema": "CREATE TABLE table_name_75 (game VARCHAR, january VARCHAR)", "sql": "SELECT COUNT(game) FROM table_name_75 WHERE january = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What source of wealth has a value of £5726m?", "schema": "CREATE TABLE table_name_51 (source_of_wealth VARCHAR, value VARCHAR)", "sql": "SELECT source_of_wealth FROM table_name_51 WHERE value = '£5726m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What was the total weight of minerals extracted by age group in the given dataset?", "schema": "CREATE TABLE labor (employee_id INT, age_group VARCHAR(50), material VARCHAR(50), weight FLOAT); INSERT INTO labor (employee_id, age_group, material, weight) VALUES (1, '18-24', 'Iron', 500), (2, '25-34', 'Iron', 450), (3, '35-44', 'Iron', 400), (4, '45-54', 'Iron', 350), (5, '55-64', 'Iron', 300);", "sql": "SELECT age_group, SUM(weight) as total_weight FROM labor GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much did the home team Hawthorn score?", "schema": "CREATE TABLE table_name_5 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_5 WHERE home_team = 'hawthorn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was round 7's lowest overall?", "schema": "CREATE TABLE table_name_72 (overall INTEGER, round VARCHAR)", "sql": "SELECT MIN(overall) FROM table_name_72 WHERE round = 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Get the names of schools with accessibility ratings below 3.", "schema": "CREATE TABLE schools (id INT, name VARCHAR(255), accessibility_rating INT);", "sql": "SELECT name FROM schools WHERE accessibility_rating < 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 465).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_empty( 'SELECT 1' ),\n true,\n 'isnt_empty(sql)',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the average horsepower of electric vehicles in the 2022 Detroit Auto Show?", "schema": "CREATE TABLE AutoShow (Id INT, VehicleType VARCHAR(50), Event VARCHAR(100), Horsepower FLOAT); INSERT INTO AutoShow (Id, VehicleType, Event, Horsepower) VALUES (1, 'Electric', '2022 Detroit Auto Show', 350), (2, 'Hybrid', '2022 Detroit Auto Show', 250), (3, 'Gasoline', '2022 Detroit Auto Show', 200), (4, 'Electric', '2022 Shanghai Auto Show', 400), (5, 'Hybrid', '2022 Shanghai Auto Show', 300);", "sql": "SELECT AVG(Horsepower) FROM AutoShow WHERE VehicleType = 'Electric' AND Event LIKE '%2022 Detroit Auto Show%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game with 96 points?", "schema": "CREATE TABLE table_name_85 (score VARCHAR, points VARCHAR)", "sql": "SELECT score FROM table_name_85 WHERE points = 96;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Which crop has the highest yield in 'crop_comparison' table?", "schema": "CREATE TABLE crop_comparison (farmer VARCHAR(50), crop VARCHAR(50), yield INT); INSERT INTO crop_comparison (farmer, crop, yield) VALUES ('FarmerA', 'corn', 100), ('FarmerA', 'wheat', 80), ('FarmerB', 'corn', 110), ('FarmerB', 'wheat', 90), ('FarmerC', 'corn', 95), ('FarmerC', 'wheat', 75);", "sql": "SELECT crop, MAX(yield) as highest_yield FROM crop_comparison GROUP BY crop;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Get all aircraft with the engine 'CFM56'", "schema": "CREATE TABLE aircraft (id INT PRIMARY KEY, model VARCHAR(50), engine VARCHAR(50)); INSERT INTO aircraft (id, model, engine) VALUES (101, '747', 'CFM56'), (102, 'A320', 'IAE V2500'), (103, 'A350', 'Rolls-Royce Trent XWB'), (104, '787', 'GE GEnx'), (105, '737', 'CFM56');", "sql": "SELECT * FROM aircraft WHERE engine = 'CFM56';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "List all electric vehicle adoption statistics for France, including the year and the number of electric vehicles sold.", "schema": "CREATE TABLE Adoption (Year INT, Country VARCHAR(255), EVsSold INT); INSERT INTO Adoption (Year, Country, EVsSold) VALUES (2018, 'France', 120000), (2019, 'France', 150000), (2020, 'France', 200000);", "sql": "SELECT Year, EVsSold FROM Adoption WHERE Country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many marine species are threatened with extinction?", "schema": "CREATE TABLE marine_species_status (species TEXT, status TEXT);", "sql": "SELECT COUNT(*) FROM marine_species_status WHERE status = 'Threatened';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the unformatted capacity per side if the size is 8in?", "schema": "CREATE TABLE table_name_42 (unformatted_capacity_per_side VARCHAR, size VARCHAR)", "sql": "SELECT unformatted_capacity_per_side FROM table_name_42 WHERE size = '8in';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the placing when the weight in kg was 58.0?", "schema": "CREATE TABLE table_24915874_1 (placing VARCHAR, weight__kg_ VARCHAR)", "sql": "SELECT placing FROM table_24915874_1 WHERE weight__kg_ = '58.0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who has a DCSF number of 3373?", "schema": "CREATE TABLE table_name_19 (name VARCHAR, dcsf_number VARCHAR)", "sql": "SELECT name FROM table_name_19 WHERE dcsf_number = 3373;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the maximum price of organic cosmetics sourced from the United States?", "schema": "CREATE TABLE products (product_id INT, name TEXT, is_organic BOOLEAN, price DECIMAL, source_country TEXT); INSERT INTO products (product_id, name, is_organic, price, source_country) VALUES (1, 'Lipstick', TRUE, 25.99, 'USA'); INSERT INTO products (product_id, name, is_organic, price, source_country) VALUES (2, 'Eye Shadow', FALSE, 23.49, 'Mexico');", "sql": "SELECT MAX(price) FROM products WHERE is_organic = TRUE AND source_country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "pgTAP test for Unique (assertion 15).", "schema": null, "sql": "SELECT * FROM check_test(\n col_is_unique( 'public', 'sometab', 'name'::name ),\n true,\n 'col_is_unique( schema, table, column )',\n 'Column sometab(name) should have a unique constraint',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Unique.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Delete all records of non-vegan products in the 'haircare' category.", "schema": "CREATE TABLE products (id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), vegan BOOLEAN); INSERT INTO products (id, name, category, price, vegan) VALUES (1, 'Shampoo', 'haircare', 12.99, false), (2, 'Conditioner', 'haircare', 14.99, true), (3, 'Hair Spray', 'haircare', 7.99, false);", "sql": "DELETE FROM products WHERE category = 'haircare' AND vegan = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 268).", "schema": null, "sql": "SELECT stats_reset > :'wal_reset_ts'::timestamptz FROM pg_stat_wal;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT stats_reset > :'wal_reset_ts'::timestamptz FROM pg_stat_wal) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many platinum points were awarded when 70 silver points were awarded?", "schema": "CREATE TABLE table_11254821_2 (points_awarded__platinum_ VARCHAR, points_awarded__silver_ VARCHAR)", "sql": "SELECT points_awarded__platinum_ FROM table_11254821_2 WHERE points_awarded__silver_ = 70;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total number of sustainable tourism awards?", "schema": "CREATE TABLE Destinations (destination_id INT, destination_name TEXT, country TEXT, awards INT); INSERT INTO Destinations (destination_id, destination_name, country, awards) VALUES (1, 'City A', 'Germany', 3), (2, 'City B', 'Switzerland', 5), (3, 'City C', 'Norway', 2);", "sql": "SELECT SUM(awards) AS total_awards FROM Destinations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the crowd when the away team was hawthorn?", "schema": "CREATE TABLE table_name_72 (crowd INTEGER, away_team VARCHAR)", "sql": "SELECT MIN(crowd) FROM table_name_72 WHERE away_team = 'hawthorn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Delete the record for the donor from Canada.", "schema": "CREATE TABLE donors (id INT, name TEXT, country TEXT, amount_donated DECIMAL(10,2)); INSERT INTO donors (id, name, country, amount_donated) VALUES (1, 'Alice', 'United States', 5000.00), (2, 'Bob', 'Canada', 6000.00);", "sql": "DELETE FROM donors WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which players made exactly 8 cuts?", "schema": "CREATE TABLE table_20590020_2 (player VARCHAR, cuts_made VARCHAR)", "sql": "SELECT player FROM table_20590020_2 WHERE cuts_made = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table_like' (example 36).", "schema": null, "sql": "CREATE TABLE test_like_4d (LIKE test_like_4 INCLUDING DEFAULTS INCLUDING GENERATED);", "explanation": "DDL from PostgreSQL core regression test for Create Table Like.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many military bases are present in Germany?", "schema": "CREATE TABLE MilitaryBases (ID INT, Country VARCHAR(20), Quantity INT); INSERT INTO MilitaryBases (ID, Country, Quantity) VALUES (1, 'Germany', 31);", "sql": "SELECT Quantity FROM MilitaryBases WHERE Country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show the names and costs of all military technologies in the 'communication' category.", "schema": "CREATE TABLE military_tech (tech_name TEXT, category TEXT, cost INTEGER); INSERT INTO military_tech (tech_name, category, cost) VALUES ('UAV', 'Surveillance', 10000), ('Satellite', 'Communication', 15000), ('AI System', 'intelligence operations', 20000);", "sql": "SELECT tech_name, cost FROM military_tech WHERE category = 'Communication';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Insert a new rural healthcare worker into the \"healthcare_workers\" table.", "schema": "CREATE TABLE healthcare_workers (id INT, name TEXT, age INT, position TEXT, hospital_id INT); CREATE TABLE rural_hospitals (id INT, name TEXT, location TEXT, state TEXT);", "sql": "INSERT INTO healthcare_workers (id, name, age, position, hospital_id) VALUES (3, 'Maria Garcia', 35, 'Nurse', 3);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number conceded for the team that had less than 8 wins, scored 21, and had less than 23 points?", "schema": "CREATE TABLE table_name_77 (conceded INTEGER, points VARCHAR, wins VARCHAR, scored VARCHAR)", "sql": "SELECT MIN(conceded) FROM table_name_77 WHERE wins < 8 AND scored = 21 AND points < 23;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Rank for Jason Kreis, with less than 305 matches?", "schema": "CREATE TABLE table_name_67 (rank INTEGER, name VARCHAR, matches VARCHAR)", "sql": "SELECT MAX(rank) FROM table_name_67 WHERE name = 'jason kreis' AND matches < 305;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For short patent type of protections, what type of PCT route is available?", "schema": "CREATE TABLE table_2279413_1 (pct_route_available VARCHAR, type_of_protection VARCHAR)", "sql": "SELECT pct_route_available FROM table_2279413_1 WHERE type_of_protection = 'short patent';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Update the price of concert records for the artist 'Adele' to 150 in the 'concerts' table.", "schema": "CREATE TABLE concerts (id INT, artist VARCHAR(255), city VARCHAR(255), tickets_sold INT, price DECIMAL(10,2));", "sql": "UPDATE concerts SET price = 150 WHERE artist = 'Adele';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the division for the division semifinals playoffs?", "schema": "CREATE TABLE table_1087659_2 (division VARCHAR, playoffs VARCHAR)", "sql": "SELECT division FROM table_1087659_2 WHERE playoffs = 'division Semifinals';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total bodyweight of everyone that has a Snatch of 153.0?", "schema": "CREATE TABLE table_name_98 (bodyweight INTEGER, snatch VARCHAR)", "sql": "SELECT SUM(bodyweight) FROM table_name_98 WHERE snatch = '153.0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the number of the horse whose trainer is Kim Bailey?", "schema": "CREATE TABLE table_20095300_1 (number INTEGER, trainer VARCHAR)", "sql": "SELECT MAX(number) FROM table_20095300_1 WHERE trainer = 'Kim Bailey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL regression test 'compression_lz4': Write the SELECT query (example 42).", "schema": null, "sql": "SELECT pg_column_compression(f1) FROM cmpart2;", "explanation": "Regression test for Compression Lz4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_column_compression(f1) FROM cmpart2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of series episode 11-02's segment c?", "schema": "CREATE TABLE table_15187735_11 (segment_c VARCHAR, series_ep VARCHAR)", "sql": "SELECT segment_c FROM table_15187735_11 WHERE series_ep = '11-02';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Award name with a Team name that is cougar robotics team?", "schema": "CREATE TABLE table_name_53 (award_name VARCHAR, team_name VARCHAR)", "sql": "SELECT award_name FROM table_name_53 WHERE team_name = 'cougar robotics team';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "How many sharks are there in the 'sharks' table?", "schema": "CREATE TABLE sharks (id INT, name VARCHAR(255), location VARCHAR(255), status VARCHAR(255));", "sql": "SELECT COUNT(*) FROM sharks WHERE status = 'alive';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the first names of all the students?", "schema": "CREATE TABLE student (fname VARCHAR)", "sql": "SELECT DISTINCT fname FROM student;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the total number of Bronze for futsal", "schema": "CREATE TABLE table_name_67 (bronze VARCHAR, sport VARCHAR)", "sql": "SELECT COUNT(bronze) FROM table_name_67 WHERE sport = 'futsal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plperl' (example 33).", "schema": null, "sql": "SELECT roundtrip('[[\"string1\", \"string2\"]]', 'ARRAY');", "explanation": "Example query from the 'jsonb_plperl' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which To par is of thomas aiken?", "schema": "CREATE TABLE table_name_77 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_77 WHERE player = 'thomas aiken';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 6).", "schema": null, "sql": "SELECT count(*) FROM float4tmp WHERE a >= -179.0;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest finish that has a start greater than 27, with a year after 1985?", "schema": "CREATE TABLE table_name_62 (finish INTEGER, start VARCHAR, year VARCHAR)", "sql": "SELECT MIN(finish) FROM table_name_62 WHERE start > 27 AND year > 1985;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average explainability score for AI algorithms, by algorithm type, in the Middle Eastern region?", "schema": "CREATE TABLE ai_algorithms (algorithm_id INT, algorithm_name VARCHAR(50), algorithm_type VARCHAR(50), region VARCHAR(50), explainability_score FLOAT); INSERT INTO ai_algorithms (algorithm_id, algorithm_name, algorithm_type, region, explainability_score) VALUES (1, 'AlgoO', 'Deep Learning', 'Middle East', 0.75), (2, 'AlgoP', 'Computer Vision', 'Middle East', 0.82), (3, 'AlgoQ', 'Deep Learning', 'Middle East', 0.80), (4, 'AlgoR', 'Rule-based', 'Middle East', 0.85);", "sql": "SELECT algorithm_type, region, AVG(explainability_score) AS avg_explainability_score FROM ai_algorithms WHERE region = 'Middle East' GROUP BY algorithm_type, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many gold medals for bellbrook HS with less than 1 silver?", "schema": "CREATE TABLE table_name_73 (gold_medals VARCHAR, ensemble VARCHAR, silver_medals VARCHAR)", "sql": "SELECT COUNT(gold_medals) FROM table_name_73 WHERE ensemble = 'bellbrook hs' AND silver_medals < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the lowest rank of the United States with a time less than 24.63?", "schema": "CREATE TABLE table_name_74 (rank INTEGER, nationality VARCHAR, time VARCHAR)", "sql": "SELECT MIN(rank) FROM table_name_74 WHERE nationality = 'united states' AND time < 24.63;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the total number of books, by genre, published by presses in India and Nigeria?", "schema": "CREATE TABLE books (id INT, title VARCHAR(255), genre VARCHAR(255), press_location VARCHAR(255)); INSERT INTO books (id, title, genre, press_location) VALUES (1, 'Book1', 'Fiction', 'India'), (2, 'Book2', 'Non-fiction', 'Nigeria'); CREATE TABLE presses (id INT, name VARCHAR(255), location VARCHAR(255)); INSERT INTO presses (id, name, location) VALUES (1, 'Press1', 'India'), (2, 'Press2', 'Nigeria');", "sql": "SELECT genre, COUNT(*) as total FROM books JOIN presses ON books.press_location = presses.location WHERE presses.location IN ('India', 'Nigeria') GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What is the total amount donated to each organization type?", "schema": "CREATE TABLE Donations (DonationID int, Amount decimal, OrganizationType text); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (1, 5000, 'Environment'); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (2, 7000, 'Health'); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (3, 3000, 'Education'); INSERT INTO Donations (DonationID, Amount, OrganizationType) VALUES (4, 2000, 'Environment'); CREATE TABLE OrganizationTypes (OrganizationTypeID int, OrganizationType text); INSERT INTO OrganizationTypes (OrganizationTypeID, OrganizationType) VALUES (1, 'Environment'); INSERT INTO OrganizationTypes (OrganizationTypeID, OrganizationType) VALUES (2, 'Health'); INSERT INTO OrganizationTypes (OrganizationTypeID, OrganizationType) VALUES (3, 'Education');", "sql": "SELECT OrganizationType, SUM(Amount) as TotalDonation FROM Donations JOIN OrganizationTypes ON Donations.OrganizationType = OrganizationTypes.OrganizationType GROUP BY OrganizationType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score with a goal of 16?", "schema": "CREATE TABLE table_name_93 (score VARCHAR, goal VARCHAR)", "sql": "SELECT score FROM table_name_93 WHERE goal = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What did Steve Stricker score?", "schema": "CREATE TABLE table_name_54 (score VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_54 WHERE player = 'steve stricker';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Find the number of players who achieved level 10 or above in 'PuzzleGame' in 'SA' region.", "schema": "CREATE TABLE PuzzleGame (playerID INT, region VARCHAR(5), level INT); INSERT INTO PuzzleGame (playerID, region, level) VALUES (1, 'SA', 12), (2, 'SA', 15), (3, 'SA', 8), (4, 'EU', 20);", "sql": "SELECT COUNT(*) FROM PuzzleGame WHERE region = 'SA' AND level >= 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Identify the community development initiatives in India with the highest expenditure in 2021.", "schema": "CREATE TABLE community_initiatives (id INT, country VARCHAR(50), initiative VARCHAR(50), year INT, expenditure DECIMAL(10,2)); INSERT INTO community_initiatives (id, country, initiative, year, expenditure) VALUES (1, 'India', 'Healthcare Center', 2021, 200000.00), (2, 'India', 'Education Building', 2021, 250000.00);", "sql": "SELECT initiative, SUM(expenditure) AS total_expenditure FROM community_initiatives WHERE country = 'India' AND year = 2021 GROUP BY initiative ORDER BY total_expenditure DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Start has a 0 Conv, 0 Pens and 8 Tries?", "schema": "CREATE TABLE table_name_54 (start VARCHAR, tries VARCHAR, conv VARCHAR, pens VARCHAR)", "sql": "SELECT start FROM table_name_54 WHERE conv = '0' AND pens = '0' AND tries = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the total weight of cruelty-free ingredients for each product?", "schema": "CREATE TABLE product (product_id INT, product_name TEXT); CREATE TABLE ingredient (ingredient_id INT, product_id INT, weight FLOAT, cruelty_free BOOLEAN); INSERT INTO product VALUES (1, 'Lipstick'), (2, 'Moisturizer'); INSERT INTO ingredient VALUES (1, 1, 50.0, true), (2, 1, 25.0, false), (3, 2, 30.0, true);", "sql": "SELECT p.product_name, SUM(i.weight) FROM product p JOIN ingredient i ON p.product_id = i.product_id WHERE i.cruelty_free = true GROUP BY p.product_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Average total for jim furyk?", "schema": "CREATE TABLE table_name_33 (total INTEGER, player VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_33 WHERE player = 'jim furyk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Get the details of the defense contract with the second highest value", "schema": "CREATE TABLE second_highest_contracts (id INT, contract_type VARCHAR(255), contract_value INT); INSERT INTO second_highest_contracts (id, contract_type, contract_value) VALUES (1, 'Service', 5000000), (2, 'Supply', 7000000), (3, 'Research', 6000000);", "sql": "SELECT * FROM second_highest_contracts WHERE contract_value = (SELECT MAX(contract_value) FROM second_highest_contracts WHERE contract_value < (SELECT MAX(contract_value) FROM second_highest_contracts));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'eager_aggregate' (example 86).", "schema": null, "sql": "CREATE TABLE eager_agg_tab_ml_p3 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (20) TO (30) PARTITION BY RANGE(x);", "explanation": "DDL from PostgreSQL core regression test for Eager Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date with a Score with 86–108?", "schema": "CREATE TABLE table_name_7 (date VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_7 WHERE score = '86–108';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the average depth of the 'Indian Ocean' in the ocean_floor_mapping table?", "schema": "CREATE TABLE ocean_floor_mapping (location TEXT, depth INTEGER); INSERT INTO ocean_floor_mapping (location, depth) VALUES ('Challenger Deep', 10994), ('Mariana Trench', 10972), ('Tonga Trench', 10823), ('Indian Ocean', 4665);", "sql": "SELECT AVG(depth) FROM ocean_floor_mapping WHERE location = 'Indian Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the rank of the player who got 58179 in yardage?", "schema": "CREATE TABLE table_18686317_1 (rank VARCHAR, yardage VARCHAR)", "sql": "SELECT rank FROM table_18686317_1 WHERE yardage = 58179;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "List all factories in regions with a low percentage of sustainable textile sourcing, ordered alphabetically by factory name.", "schema": "CREATE TABLE Factories (FactoryID int, FactoryName varchar(50), Region varchar(50)); INSERT INTO Factories (FactoryID, FactoryName, Region) VALUES (1, 'EcoFactory', 'Asia'); INSERT INTO Factories (FactoryID, FactoryName, Region) VALUES (2, 'GreenManufacturing', 'Europe'); CREATE TABLE Sourcing (FactoryID int, SustainableSourcePercentage decimal(5,2)); INSERT INTO Sourcing (FactoryID, SustainableSourcePercentage) VALUES (1, 0.60); INSERT INTO Sourcing (FactoryID, SustainableSourcePercentage) VALUES (2, 0.70);", "sql": "SELECT f.FactoryName FROM Factories f INNER JOIN Sourcing s ON f.FactoryID = s.FactoryID WHERE s.SustainableSourcePercentage < 0.75 GROUP BY f.FactoryName ORDER BY f.FactoryName ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "What are the names of the 2 most recent marine conservation projects?", "schema": "CREATE TABLE marine_conservation_projects (project_name TEXT, start_date DATE); INSERT INTO marine_conservation_projects (project_name, start_date) VALUES ('Coral Reef Restoration', '2022-03-01'), ('Seagrass Protection', '2021-09-15'), ('Mangrove Forest Expansion', '2020-05-01');", "sql": "SELECT project_name FROM marine_conservation_projects ORDER BY start_date DESC LIMIT 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average phosphorus concentration (in µg/L) for each species in 2024, ordered by the average value?", "schema": "CREATE TABLE species_phosphorus (species VARCHAR(255), year INT, avg_phosphorus FLOAT); INSERT INTO species_phosphorus (species, year, avg_phosphorus) VALUES ('Salmon', 2024, 12.0), ('Tilapia', 2024, 7.5), ('Catfish', 2024, 6.0), ('Trout', 2024, 10.5), ('Shrimp', 2024, 14.0), ('Lobster', 2024, 15.0);", "sql": "SELECT species, AVG(avg_phosphorus) as avg_phosphorus_ug_l FROM species_phosphorus WHERE year = 2024 GROUP BY species ORDER BY avg_phosphorus_ug_l;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the represent province for the contestant whose hometown is Woerden?", "schema": "CREATE TABLE table_27946889_2 (represent_province VARCHAR, hometown VARCHAR)", "sql": "SELECT represent_province FROM table_27946889_2 WHERE hometown = 'Woerden';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had a second qualification time of 1:01.777?", "schema": "CREATE TABLE table_name_9 (name VARCHAR, qual_2 VARCHAR)", "sql": "SELECT name FROM table_name_9 WHERE qual_2 = '1:01.777';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 290).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_relation( '__SDFSDFD__', 'lol' ),\n true,\n 'hasnt_relation(non-existent schema, tab)',\n 'lol',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the power at Baguio where the frequency is 102.3mhz?", "schema": "CREATE TABLE table_name_59 (power__kw_ VARCHAR, frequency VARCHAR, location VARCHAR)", "sql": "SELECT power__kw_ FROM table_name_59 WHERE frequency = '102.3mhz' AND location = 'baguio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 380 g/mi (236 g/km) is the dirty electric grid rocky mountains (denver) what is the u.s national average electric mix?", "schema": "CREATE TABLE table_24620684_2 (us_national_average_electric_mix VARCHAR, dirty_electric_grid_rocky_mountains__denver_ VARCHAR)", "sql": "SELECT us_national_average_electric_mix FROM table_24620684_2 WHERE dirty_electric_grid_rocky_mountains__denver_ = '380 g/mi (236 g/km)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Identify artworks that have been exhibited in both London and Tokyo.", "schema": "CREATE TABLE Exhibitions (ExhibitionID INT PRIMARY KEY, Title VARCHAR(100), City VARCHAR(100), StartDate DATE, EndDate DATE, ArtWorkID INT, FOREIGN KEY (ArtWorkID) REFERENCES ArtWorks(ArtWorkID)); INSERT INTO Exhibitions (ExhibitionID, Title, City, StartDate, EndDate, ArtWorkID) VALUES (1, 'Artistic Revolutions', 'London', '2020-01-01', '2020-03-31', 1); INSERT INTO Exhibitions (ExhibitionID, Title, City, StartDate, EndDate, ArtWorkID) VALUES (2, 'Artistic Revolutions', 'Tokyo', '2020-04-01', '2020-06-30', 1); CREATE TABLE ArtWorks (ArtWorkID INT PRIMARY KEY, Title VARCHAR(100)); INSERT INTO ArtWorks (ArtWorkID, Title) VALUES (1, 'The Persistence of Memory');", "sql": "SELECT ArtWorks.Title FROM ArtWorks INNER JOIN Exhibitions ON ArtWorks.ArtWorkID = Exhibitions.ArtWorkID WHERE Exhibitions.City IN ('London', 'Tokyo') GROUP BY ArtWorks.Title HAVING COUNT(DISTINCT Exhibitions.City) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "What is the number of eco-certified accommodations in Tokyo?", "schema": "CREATE TABLE accommodations (accommodation_id INT, name TEXT, city TEXT, eco_certified INT); INSERT INTO accommodations (accommodation_id, name, city, eco_certified) VALUES (1, 'Eco Lodge A', 'Tokyo', 1), (2, 'Hotel B', 'Tokyo', 0), (3, 'Inn C', 'Tokyo', 1);", "sql": "SELECT COUNT(*) FROM accommodations WHERE city = 'Tokyo' AND eco_certified = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of the courses that do not have any prerequisite?", "schema": "CREATE TABLE prereq (title VARCHAR, course_id VARCHAR); CREATE TABLE course (title VARCHAR, course_id VARCHAR)", "sql": "SELECT title FROM course WHERE NOT course_id IN (SELECT course_id FROM prereq);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average flight safety score for flights operated by SpaceAirlines in the last 2 years?", "schema": "CREATE TABLE flights (flight_id INT, airline VARCHAR(255), flight_date DATE, safety_score INT); INSERT INTO flights (flight_id, airline, flight_date, safety_score) VALUES (1, 'SpaceAirlines', '2020-02-03', 95), (2, 'SpaceAirlines', '2020-06-15', 92), (3, 'SpaceAirlines', '2019-11-18', 97), (4, 'SpaceAirlines', '2021-03-25', 93), (5, 'SpaceAirlines', '2018-09-01', 96);", "sql": "SELECT AVG(safety_score) FROM flights WHERE airline = 'SpaceAirlines' AND flight_date >= DATEADD(year, -2, CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "What is the total number of employees from underrepresented communities in the 'mining_operations' table, grouped by their departments?", "schema": "CREATE TABLE mining_operations (id INT, name VARCHAR(50), job_title VARCHAR(50), department VARCHAR(50), community VARCHAR(50)); INSERT INTO mining_operations (id, name, job_title, department, community) VALUES (1, 'John Doe', 'Mining Engineer', 'Operations', 'Underrepresented'); INSERT INTO mining_operations (id, name, job_title, department, community) VALUES (2, 'Jane Smith', 'Geologist', 'Exploration', 'Underrepresented'); INSERT INTO mining_operations (id, name, job_title, department, community) VALUES (3, 'Maria Garcia', 'Mining Engineer', 'Operations', 'Not Underrepresented');", "sql": "SELECT department, COUNT(*) as total_employees FROM mining_operations WHERE community = 'Underrepresented' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "How many movies were produced in Italy each year?", "schema": "CREATE TABLE movies (id INT, title VARCHAR(255), rating FLOAT, release_year INT, country VARCHAR(50)); INSERT INTO movies (id, title, rating, release_year, country) VALUES (1, 'Movie1', 7.5, 2010, 'Italy'), (2, 'Movie2', 8.2, 2012, 'Italy'), (3, 'Movie3', 6.8, 2015, 'Italy'), (4, 'Movie4', 8.1, 2011, 'Italy'), (5, 'Movie5', 7.2, 2013, 'Italy');", "sql": "SELECT release_year, COUNT(*) FROM movies WHERE country = 'Italy' GROUP BY release_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the average weight of ingredients for all products sold in the US region?", "schema": "CREATE TABLE product (product_id INT, product_name TEXT); CREATE TABLE ingredient (ingredient_id INT, product_id INT, weight FLOAT, country TEXT); CREATE TABLE purchase (purchase_id INT, product_id INT, purchase_date DATE, region TEXT); INSERT INTO product VALUES (1, 'Lipstick'), (2, 'Moisturizer'); INSERT INTO ingredient VALUES (1, 1, 50.0, 'CA'), (2, 1, 25.0, 'US'), (3, 2, 30.0, 'CA'); INSERT INTO purchase VALUES (1, 1, '2022-01-10', 'US'), (2, 2, '2022-01-15', 'CA');", "sql": "SELECT AVG(weight) FROM ingredient i JOIN product p ON i.product_id = p.product_id JOIN purchase pu ON p.product_id = pu.product_id WHERE pu.region = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the zhou jinsong's hometown?", "schema": "CREATE TABLE table_28180840_15 (hometown VARCHAR, name_name_of_act VARCHAR)", "sql": "SELECT hometown FROM table_28180840_15 WHERE name_name_of_act = 'Zhou Jinsong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the game at Dowdy-Ficklen stadium • Greenville, NC, with 27,321 in attendance?", "schema": "CREATE TABLE table_name_96 (date VARCHAR, site VARCHAR, attendance VARCHAR)", "sql": "SELECT date FROM table_name_96 WHERE site = 'dowdy-ficklen stadium • greenville, nc' AND attendance = '27,321';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 380).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_cast ( NAME, NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What was the highest score in League Y?", "schema": "CREATE TABLE League_Y (match_id INT, score INT); INSERT INTO League_Y (match_id, score) VALUES (1, 100), (2, 90), (3, 110);", "sql": "SELECT MAX(score) FROM League_Y;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the total amount of Shariah-compliant loans issued by financial institutions in the United States, broken down by state, for the year 2020?", "schema": "CREATE TABLE financial_institutions (institution_id INT, institution_name VARCHAR(255), state VARCHAR(255)); INSERT INTO financial_institutions (institution_id, institution_name, state) VALUES (1, 'Institution A', 'New York'), (2, 'Institution B', 'California'); CREATE TABLE shariah_compliant_loans (loan_id INT, institution_id INT, loan_amount DECIMAL(10, 2), loan_date DATE);", "sql": "SELECT f.state, SUM(s.loan_amount) as total_loan_amount FROM shariah_compliant_loans s JOIN financial_institutions f ON s.institution_id = f.institution_id WHERE YEAR(loan_date) = 2020 GROUP BY f.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the last final lost of the club with a last win in 1959?", "schema": "CREATE TABLE table_name_70 (last_final_lost VARCHAR, last_win VARCHAR)", "sql": "SELECT last_final_lost FROM table_name_70 WHERE last_win = '1959';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_index' (example 219).", "schema": null, "sql": "INSERT INTO covering_index_heap VALUES(1,2,'BBB');", "explanation": "DML from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 9).", "schema": null, "sql": "SELECT 2 * m FROM money_data;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 2 * m FROM money_data) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the year built for number built of 10", "schema": "CREATE TABLE table_name_14 (year_built__converted VARCHAR, _ VARCHAR, no_built__converted VARCHAR)", "sql": "SELECT year_built__converted * _ FROM table_name_14 WHERE no_built__converted * _ = '10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "How many investments have been made with a risk assessment score greater than 80?", "schema": "CREATE TABLE investments (id INT, sector VARCHAR(255), risk_assessment_score INT); INSERT INTO investments (id, sector, risk_assessment_score) VALUES (1, 'Technology', 80), (2, 'Healthcare', 70), (3, 'Social Impact Investing', 90);", "sql": "SELECT COUNT(*) FROM investments WHERE risk_assessment_score > 80;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the attendance when oadby town is away?", "schema": "CREATE TABLE table_name_37 (attendance VARCHAR, away_team VARCHAR)", "sql": "SELECT attendance FROM table_name_37 WHERE away_team = 'oadby town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Calculate the average age of visitors who participated in offline events in Australia.", "schema": "CREATE TABLE EventParticipants (event_id INT, country VARCHAR(20), participant_age INT, event_type VARCHAR(10)); INSERT INTO EventParticipants (event_id, country, participant_age, event_type) VALUES (1, 'Australia', 25, 'Offline'), (2, 'Australia', 30, 'Online'), (3, 'New Zealand', 35, 'Offline');", "sql": "SELECT AVG(participant_age) FROM EventParticipants WHERE country = 'Australia' AND event_type = 'Offline';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the average environmental factor score for projects in the education sector with an investment amount greater than $700,000?", "schema": "CREATE TABLE impact_investments (id INT, project VARCHAR(50), location VARCHAR(50), investment_amount DECIMAL(10,2), impact_score INT, primary_sector VARCHAR(50)); INSERT INTO impact_investments (id, project, location, investment_amount, impact_score, primary_sector) VALUES (1, 'School Construction', 'Nigeria', 750000.00, 85, 'Education');", "sql": "SELECT AVG(e.environmental_factor) as avg_env_factor FROM esg_factors e JOIN impact_investments i ON e.investment_id = i.id WHERE i.primary_sector = 'Education' AND i.investment_amount > 700000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Milan – San Remo which has a Liège–Bastogne–Liège of danilo di luca ( ita )?", "schema": "CREATE TABLE table_name_81 (milan___san_remo VARCHAR, liège_bastogne_liège VARCHAR)", "sql": "SELECT milan___san_remo FROM table_name_81 WHERE liège_bastogne_liège = 'danilo di luca ( ita )';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'merge' (example 115).", "schema": null, "sql": "INSERT INTO source VALUES (4, 40);", "explanation": "DML from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Company Name, when the Accreditation Level is Joyn Hot Fixes, and when the Accreditation Status is Approved (Awarded 17.05.13)?", "schema": "CREATE TABLE table_name_13 (company_name VARCHAR, accreditation_level VARCHAR, accreditation_status VARCHAR)", "sql": "SELECT company_name FROM table_name_13 WHERE accreditation_level = 'joyn hot fixes' AND accreditation_status = 'approved (awarded 17.05.13)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the English title of 餓狼伝説バトルアーカイブズ2?", "schema": "CREATE TABLE table_name_95 (english_title VARCHAR, japanese_title VARCHAR)", "sql": "SELECT english_title FROM table_name_95 WHERE japanese_title = '餓狼伝説バトルアーカイブズ2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Total when Georgia is the nation with less than 11 rank?", "schema": "CREATE TABLE table_name_46 (total INTEGER, nation VARCHAR, rank VARCHAR)", "sql": "SELECT MAX(total) FROM table_name_46 WHERE nation = 'georgia' AND rank < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 76).", "schema": null, "sql": "select count(*) = 1 as dot_found\n from pg_ls_dir('.', false, false) as ls where ls = '.';", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) = 1 as dot_found\n from pg_ls_dir('.', false, false) as ls where ls = '.') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many news articles were published about climate change in the last year, for publications in the United States?", "schema": "CREATE TABLE news_articles (id INT, title VARCHAR(100), publication_date DATE, topic VARCHAR(50), publication_country VARCHAR(50)); INSERT INTO news_articles (id, title, publication_date, topic, publication_country) VALUES (1, 'Climate Change: A Growing Crisis', '2022-02-12', 'Climate Change', 'United States'), (2, 'Political Turmoil in Europe', '2022-02-13', 'Politics', 'United Kingdom');", "sql": "SELECT COUNT(*) FROM news_articles WHERE topic = 'Climate Change' AND publication_country = 'United States' AND publication_date >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What is the minimum height of players in the basketball team 'Los Angeles Lakers'?", "schema": "CREATE TABLE players (player_name TEXT, team TEXT, height FLOAT); INSERT INTO players (player_name, team, height) VALUES ('Charlie Davis', 'Los Angeles Lakers', 200.66); INSERT INTO players (player_name, team, height) VALUES ('Diana Williams', 'Los Angeles Lakers', 192.02);", "sql": "SELECT MIN(height) FROM players WHERE team = 'Los Angeles Lakers';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many occurrences of pests were there for each pest type on farms in the West region, owned by farmers over 50 years old?", "schema": "CREATE TABLE pests (id INT, date DATE, name VARCHAR(50), count INT, farm_id INT, FOREIGN KEY (farm_id) REFERENCES farmers(id)); INSERT INTO pests (id, date, name, count, farm_id) VALUES (1, '2022-01-01', 'Aphids', 10, 1), (2, '2022-01-02', 'Spider mites', 8, 2), (3, '2022-01-03', 'Thrips', 12, 5), (4, '2022-01-04', 'Whiteflies', 7, 6); INSERT INTO farmers (id, name, region, age) VALUES (1, 'James', 'West', 53), (2, 'Sophia', 'South', 40), (3, 'Mason', 'West', 57), (4, 'Lily', 'East', 55), (5, 'Olivia', 'West', 60), (6, 'Benjamin', 'East', 48);", "sql": "SELECT p.name, COUNT(p.count) as total_occurrences FROM pests p JOIN farmers f ON p.farm_id = f.id WHERE f.region = 'West' AND f.age > 50 GROUP BY p.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tier is for football at Tianhe Stadium?", "schema": "CREATE TABLE table_name_62 (tier VARCHAR, sport VARCHAR, stadium VARCHAR)", "sql": "SELECT tier FROM table_name_62 WHERE sport = 'football' AND stadium = 'tianhe stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average ticket price for TeamA's home games?", "schema": "CREATE TABLE tickets (id INT, team VARCHAR(50), location VARCHAR(50), price DECIMAL(5, 2)); INSERT INTO tickets (id, team, location, price) VALUES (1, 'TeamA', 'Home', 100.00), (2, 'TeamA', 'Away', 75.00), (3, 'TeamB', 'Home', 120.00), (4, 'TeamB', 'Away', 80.00);", "sql": "SELECT AVG(price) FROM tickets WHERE team = 'TeamA' AND location = 'Home';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the average time between train breakdowns for trains in the 'South' district?", "schema": "CREATE TABLE TrainBreakdowns (BreakdownID INT, BreakdownDate DATE, District VARCHAR(20)); INSERT INTO TrainBreakdowns (BreakdownID, BreakdownDate, District) VALUES (1, '2022-01-02', 'North'), (2, '2022-01-05', 'South'), (3, '2022-01-07', 'East'), (4, '2022-01-10', 'South'), (5, '2022-01-12', 'West'); CREATE TABLE TrainEvents (EventID INT, EventDate DATE, EventType VARCHAR(20), TrainID INT); INSERT INTO TrainEvents (EventID, EventDate, EventType, TrainID) VALUES (1, '2022-01-01', 'Start of Service', 1001), (2, '2022-01-02', 'Breakdown', 1001), (3, '2022-01-05', 'Breakdown', 1002), (4, '2022-01-06', 'End of Service', 1002), (5, '2022-01-07', 'Start of Service', 1003), (6, '2022-01-07', 'Breakdown', 1003);", "sql": "SELECT AVG(DATEDIFF(day, EventDate, LEAD(EventDate) OVER (PARTITION BY TrainID ORDER BY EventDate))) FROM TrainEvents WHERE EventType = 'Breakdown' AND District = 'South';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 171, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 472).", "schema": null, "sql": "SELECT citext_pattern_ge('A'::citext, 'b'::citext) AS false;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which policies were implemented in 'Delhi' or 'Mumbai' between 2018 and 2021?", "schema": "CREATE TABLE City (id INT, name VARCHAR(50)); INSERT INTO City (id, name) VALUES (1, 'New York'); INSERT INTO City (id, name) VALUES (2, 'Los Angeles'); INSERT INTO City (id, name) VALUES (3, 'Delhi'); INSERT INTO City (id, name) VALUES (4, 'Mumbai'); INSERT INTO City (id, name) VALUES (5, 'Tokyo'); CREATE TABLE Policy (id INT, name VARCHAR(50), city_id INT, category VARCHAR(50), budget DECIMAL(10,2), start_date DATE, end_date DATE); INSERT INTO Policy (id, name, city_id, category, budget, start_date, end_date) VALUES (1, 'Education', 3, 'Education', 1200000, '2021-01-01', '2023-12-31'); INSERT INTO Policy (id, name, city_id, category, budget, start_date, end_date) VALUES (2, 'Healthcare', 3, 'Healthcare', 1500000, '2020-01-01', '2022-12-31'); INSERT INTO Policy (id, name, city_id, category, budget, start_date, end_date) VALUES (3, 'Transportation', 4, 'Transportation', 2000000, '2019-01-01', '2024-12-31'); INSERT INTO Policy (id, name, city_id, category, budget, start_date, end_date) VALUES (4, 'Education', 4, 'Education', 1800000, '2020-01-01', '2023-12-31');", "sql": "SELECT name, start_date FROM Policy JOIN City ON Policy.city_id = City.id WHERE City.name IN ('Delhi', 'Mumbai') AND YEAR(start_date) BETWEEN 2018 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "PostgreSQL regression test 'select_distinct_on': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s;", "explanation": "Regression test for Select Distinct On in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_on_tbl ORDER BY x) s) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the maximum property price in sustainable neighborhoods with co-ownership agreements?", "schema": "CREATE TABLE property (id INT, price INT, sustainability_rating INT, co_ownership BOOLEAN); INSERT INTO property (id, price, sustainability_rating, co_ownership) VALUES (1, 500000, 5, true), (2, 400000, 3, false);", "sql": "SELECT MAX(price) FROM property WHERE sustainability_rating >= 4 AND co_ownership = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of the episode with production code 2J5515?", "schema": "CREATE TABLE table_28582091_2 (title VARCHAR, production_code VARCHAR)", "sql": "SELECT title FROM table_28582091_2 WHERE production_code = '2J5515';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many places have yamaha as the machine, and 89.85mph as the speed?", "schema": "CREATE TABLE table_name_30 (place INTEGER, machine VARCHAR, speed VARCHAR)", "sql": "SELECT SUM(place) FROM table_name_30 WHERE machine = 'yamaha' AND speed = '89.85mph';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the best finish record for Sean O'hair?", "schema": "CREATE TABLE table_29504351_2 (best_finish VARCHAR, player VARCHAR)", "sql": "SELECT best_finish FROM table_29504351_2 WHERE player = 'Sean O'Hair';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "How many players from India play games that are available in the 'Action' category?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Country VARCHAR(50)); INSERT INTO Players (PlayerID, Age, Country) VALUES (1, 25, 'India'), (2, 30, 'Canada'), (3, 22, 'Germany'), (4, 35, 'Japan'); CREATE TABLE GameLibrary (GameID INT, GameName VARCHAR(50), GameType VARCHAR(50), Category VARCHAR(50)); INSERT INTO GameLibrary (GameID, GameName, GameType, Category) VALUES (1, 'GameA', 'VR', 'Action'), (2, 'GameB', 'Non-VR', 'Strategy'), (3, 'GameC', 'VR', 'Action'); CREATE TABLE PlayerGameLibrary (PlayerID INT, GameID INT); INSERT INTO PlayerGameLibrary (PlayerID, GameID) VALUES (1, 1), (2, 2), (3, 1), (4, 3);", "sql": "SELECT COUNT(Players.PlayerID) FROM Players JOIN PlayerGameLibrary ON Players.PlayerID = PlayerGameLibrary.PlayerID JOIN GameLibrary ON PlayerGameLibrary.GameID = GameLibrary.GameID WHERE Players.Country = 'India' AND GameLibrary.Category = 'Action';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score, when the Year is after 2007, and when the Country is England?", "schema": "CREATE TABLE table_name_5 (score VARCHAR, year VARCHAR, country VARCHAR)", "sql": "SELECT score FROM table_name_5 WHERE year > 2007 AND country = 'england';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total mass of spacecraft manufactured by 'Cosmic Corp.' in 2022?", "schema": "CREATE TABLE spacecraft(id INT, name VARCHAR(50), manufacturer VARCHAR(50), mass FLOAT, manufacture_year INT); INSERT INTO spacecraft VALUES(1, 'Artemis 1', 'Cosmic Corp.', 25000., 2022), (2, 'Artemis 2', 'Cosmic Corp.', 26000., 2023);", "sql": "SELECT SUM(mass) FROM spacecraft WHERE manufacturer = 'Cosmic Corp.' AND manufacture_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total calorie count for meals served by each vendor?", "schema": "CREATE TABLE Meals (MealID INT, MealName VARCHAR(50), Vendor VARCHAR(50), Calories INT); INSERT INTO Meals (MealID, MealName, Vendor, Calories) VALUES (1, 'Spaghetti Bolognese', 'Pasta Palace', 650), (2, 'Chicken Tikka Masala', 'Curry House', 850);", "sql": "SELECT Vendor, SUM(Calories) as TotalCalories FROM Meals GROUP BY Vendor;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the percentage of patients in Chicago who have not received the flu vaccine this year?", "schema": "CREATE TABLE patient (patient_id INT, age INT, gender VARCHAR(10), city VARCHAR(20)); INSERT INTO patient (patient_id, age, gender, city) VALUES (1, 5, 'Female', 'Chicago'); INSERT INTO patient (patient_id, age, gender, city) VALUES (2, 10, 'Male', 'Chicago');", "sql": "SELECT 100.0 * SUM(CASE WHEN flu_vaccine_date IS NULL THEN 1 ELSE 0 END) OVER (PARTITION BY city ORDER BY patient_id DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) / COUNT(*) OVER (PARTITION BY city ORDER BY patient_id DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS percentage FROM patient WHERE city = 'Chicago';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 348, "num_statements": 1} {"question": "What is the minimum sea level, grouped by month?", "schema": "CREATE TABLE sea_level (id INT, month INT, level FLOAT); INSERT INTO sea_level (id, month, level) VALUES (1, 1, 20); INSERT INTO sea_level (id, month, level) VALUES (2, 2, 19); INSERT INTO sea_level (id, month, level) VALUES (3, 3, 18);", "sql": "SELECT month, MIN(level) FROM sea_level GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which District is Constituency number 22?", "schema": "CREATE TABLE table_name_43 (district VARCHAR, constituency_number VARCHAR)", "sql": "SELECT district FROM table_name_43 WHERE constituency_number = '22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the surface made of if the year is 1979 and the championship is US Open?", "schema": "CREATE TABLE table_22597626_2 (surface VARCHAR, championship VARCHAR, year VARCHAR)", "sql": "SELECT surface FROM table_22597626_2 WHERE championship = 'US Open' AND year = 1979;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many languages are spoken in each country?", "schema": "CREATE TABLE Countries (CountryID INT, CountryName VARCHAR(50)); CREATE TABLE Languages (LanguageID INT, CountryID INT, Spoken VARCHAR(50)); INSERT INTO Countries VALUES (1, 'CountryA'), (2, 'CountryB'), (3, 'CountryC'); INSERT INTO Languages VALUES (1, 1, 'LanguageX'), (2, 1, 'LanguageY'), (3, 2, 'LanguageY'), (4, 3, 'LanguageZ'), (5, 3, 'LanguageW');", "sql": "SELECT C.CountryName, COUNT(L.LanguageID) AS LanguagesSpoken FROM Countries C JOIN Languages L ON C.CountryID = L.CountryID GROUP BY C.CountryName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'case' (example 7).", "schema": null, "sql": "INSERT INTO CASE2_TBL VALUES (1, -1);", "explanation": "DML from PostgreSQL core regression test for Case.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the name of the party that has the most delegates.", "schema": "CREATE TABLE election (Party VARCHAR); CREATE TABLE party (Party VARCHAR, Party_ID VARCHAR)", "sql": "SELECT T2.Party FROM election AS T1 JOIN party AS T2 ON T1.Party = T2.Party_ID GROUP BY T1.Party ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Determine the difference in average transaction amounts between customers from 'North America' and 'Asia'.", "schema": "CREATE TABLE transaction_amounts (customer_id INT, region VARCHAR(20), transaction_amount NUMERIC(12,2)); INSERT INTO transaction_amounts (customer_id, region, transaction_amount) VALUES (1, 'Asia', 1000), (2, 'Europe', 1500), (3, 'Africa', 750), (4, 'North America', 2000), (5, 'South America', 1200), (6, 'Australia', 1750), (7, 'Asia', 1500), (8, 'Europe', 2000), (9, 'Africa', 1000), (10, 'North America', 2500);", "sql": "SELECT AVG(transaction_amount) as avg_asia FROM transaction_amounts WHERE region = 'Asia' INTERSECT SELECT AVG(transaction_amount) as avg_north_america FROM transaction_amounts WHERE region = 'North America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "List the names of all startups that have received funding and have diverse founding teams (more than one founder with different genders).", "schema": "CREATE TABLE startups (id INT, name TEXT, founder1 TEXT, founder2 TEXT, funding FLOAT); INSERT INTO startups (id, name, founder1, founder2, funding) VALUES (1, 'Acme', 'John Doe', 'Jane Smith', 500000.00); INSERT INTO startups (id, name, founder1, founder2, funding) VALUES (2, 'Beta Corp', 'Jane Smith', 'John Doe', 750000.00); INSERT INTO startups (id, name, founder1, founder2, funding) VALUES (3, 'Gamma Inc', 'Alice', NULL, 300000.00);", "sql": "SELECT name FROM startups WHERE (founder1 != founder2) AND (founder1 IS NOT NULL AND founder2 IS NOT NULL);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION pgstatindex(IN relname text,\n OUT version INT,\n OUT tree_level INT,\n OUT index_size BIGINT,\n OUT root_block_no BIGINT,\n OUT internal_pages BIGINT,\n OUT leaf_pages BIGINT,\n OUT empty_pages BIGINT,\n OUT deleted_pages BIGINT,\n OUT avg_leaf_density FLOAT8,\n OUT leaf_fragmentation FLOAT8)\nAS 'MODULE_PATHNAME', 'pgstatindex'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 399, "num_statements": 1} {"question": "How many esports events were held in total in 2021?", "schema": "CREATE TABLE esports_events (id INT, year INT, location VARCHAR(20)); INSERT INTO esports_events (id, year, location) VALUES (1, 2022, 'USA'), (2, 2022, 'Germany'), (3, 2021, 'France');", "sql": "SELECT COUNT(*) FROM esports_events WHERE year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Which waste types are not included in the circular economy initiatives?", "schema": "CREATE TABLE waste_types (type TEXT, id INTEGER); INSERT INTO waste_types (type, id) VALUES ('Plastic', 1), ('Paper', 2), ('Glass', 3), ('Metal', 4); CREATE TABLE circular_economy_initiatives (waste_type_id INTEGER); INSERT INTO circular_economy_initiatives (waste_type_id) VALUES (1), (2), (3);", "sql": "SELECT wt.type FROM waste_types wt LEFT JOIN circular_economy_initiatives cei ON wt.id = cei.waste_type_id WHERE cei.waste_type_id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the attendance date of the game home team Shrewsbury Town played?", "schema": "CREATE TABLE table_name_48 (attendance VARCHAR, home_team VARCHAR)", "sql": "SELECT attendance FROM table_name_48 WHERE home_team = 'shrewsbury town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game that had a loss of Rojas (9–8)?", "schema": "CREATE TABLE table_name_74 (date VARCHAR, loss VARCHAR)", "sql": "SELECT date FROM table_name_74 WHERE loss = 'rojas (9–8)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 11).", "schema": null, "sql": "select pgp_sym_decrypt(dearmor('\n-----BEGIN PGP MESSAGE-----\nComment: dat1.aes.sha1.mdc.s2k3.z0\n\njA0EBwMCEq4Su3ZqNEJg0kQB4QG5jBTKF0i04xtH+avzmLhstBNRxvV3nsmB3cwl\nz+9ZaA/XdSx5ZiFnMym8P6r8uY9rLjjNptvvRHlxIReF+p9MNg==\n=VJKg\n-----END PGP MESSAGE-----\n'), 'foobar');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'publication' (example 605).", "schema": null, "sql": "UPDATE pub_testpart2.parent2 set a = 1;", "explanation": "DML from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Is Jasper being producted?", "schema": "CREATE TABLE table_name_66 (in_production VARCHAR, codename VARCHAR)", "sql": "SELECT in_production FROM table_name_66 WHERE codename = 'jasper';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Delete player records from the \"players\" table who have not played any game", "schema": "CREATE TABLE players (player_id INT, name VARCHAR(255), country VARCHAR(255), date_registered DATE); CREATE TABLE player_scores (player_id INT, game_name VARCHAR(255), score INT, date DATE);", "sql": "DELETE FROM players WHERE player_id NOT IN (SELECT player_id FROM player_scores);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the publication date when the fictional date is 2112", "schema": "CREATE TABLE table_name_25 (publication_date VARCHAR, fictional_date VARCHAR)", "sql": "SELECT publication_date FROM table_name_25 WHERE fictional_date = '2112';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which regulatory frameworks were adopted in 2022?", "schema": "CREATE TABLE regulatory_frameworks (framework_id INT PRIMARY KEY, country VARCHAR(255), name VARCHAR(255), framework TEXT, adoption_date TIMESTAMP); INSERT INTO regulatory_frameworks (framework_id, country, name, framework, adoption_date) VALUES (3, 'Japan', 'JFSA Guidelines', 'Blockchain regulations in Japan', '2022-04-01');", "sql": "SELECT country, name, framework FROM regulatory_frameworks WHERE adoption_date BETWEEN '2022-01-01' AND '2022-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "What are the unique IP addresses that have been associated with 'APT35' threat actor?", "schema": "CREATE TABLE actor_ip (id INT, threat_actor VARCHAR(255), ip_address VARCHAR(255)); INSERT INTO actor_ip (id, threat_actor, ip_address) VALUES (1, 'APT28', '192.168.1.1'), (2, 'APT33', '10.0.0.1'), (3, 'APT34', '192.168.1.2'), (4, 'APT29', '10.0.0.2'), (5, 'APT35', '192.168.1.3'), (6, 'APT28', '10.0.0.3');", "sql": "SELECT DISTINCT ip_address FROM actor_ip WHERE threat_actor = 'APT35';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 125).", "schema": null, "sql": "CREATE AGGREGATE myaggn15b(BASETYPE = anyelement, SFUNC = tfnp,\n STYPE = anyarray, INITCOND = '{}');", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many candidates won the election in the district whose incumbent is Bud Shuster?", "schema": "CREATE TABLE table_1341604_39 (candidates VARCHAR, incumbent VARCHAR)", "sql": "SELECT COUNT(candidates) FROM table_1341604_39 WHERE incumbent = 'Bud Shuster';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times does the rebuilt data contain cannot handle non-empty timestamp argument! 1929 and scrapped data contain cannot handle non-empty timestamp argument! 1954?", "schema": "CREATE TABLE table_12113888_1 (number VARCHAR, rebuilt VARCHAR, scrapped_sold VARCHAR)", "sql": "SELECT COUNT(number) FROM table_12113888_1 WHERE rebuilt = 'Cannot handle non-empty timestamp argument! 1929' AND scrapped_sold = 'Cannot handle non-empty timestamp argument! 1954';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the total salary expense for each department in 2021?", "schema": "CREATE TABLE departments (id INT, name VARCHAR(255));CREATE TABLE employees (id INT, department_id INT, salary INT, hire_date DATE);", "sql": "SELECT d.name, SUM(e.salary) AS salary_expense FROM departments d INNER JOIN employees e ON d.id = e.department_id WHERE e.hire_date >= '2021-01-01' AND e.hire_date < '2022-01-01' GROUP BY d.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "What is the maximum distance for routes that originate from 'City C'?", "schema": "CREATE TABLE Routes (id INT, origin_city VARCHAR(255), destination_city VARCHAR(255), distance INT, eta INT); INSERT INTO Routes (id, origin_city, destination_city, distance, eta) VALUES (1, 'City A', 'City C', 200, 2); INSERT INTO Routes (id, origin_city, destination_city, distance, eta) VALUES (2, 'City B', 'City D', 250, 3); INSERT INTO Routes (id, origin_city, destination_city, distance, eta) VALUES (3, 'City C', 'City E', 300, 4);", "sql": "SELECT origin_city, MAX(distance) FROM Routes WHERE origin_city = 'City C' GROUP BY origin_city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the total number of hospitals and total number of beds for each city?", "schema": "CREATE TABLE hospitals (id INT, name TEXT, city TEXT, state TEXT, beds INT); INSERT INTO hospitals (id, name, city, state, beds) VALUES (1, 'General Hospital', 'Miami', 'Florida', 500); INSERT INTO hospitals (id, name, city, state, beds) VALUES (2, 'Memorial Hospital', 'Boston', 'Massachusetts', 600);", "sql": "SELECT city, COUNT(*) as hospital_count, SUM(beds) as total_beds FROM hospitals GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 171).", "schema": null, "sql": "SELECT * FROM check_test(\n columns_are( 'fou', ARRAY['id', 'name', 'numb', 'myInt'], 'whatever' ),\n true,\n 'columns_are(table, columns, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the name and the nationality of the oldest host.", "schema": "CREATE TABLE HOST (Name VARCHAR, Nationality VARCHAR, Age VARCHAR)", "sql": "SELECT Name, Nationality FROM HOST ORDER BY Age DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the number of viewers in millions for the broadcast from 2010?", "schema": "CREATE TABLE table_24212608_1 (viewers__millions_ VARCHAR, broadcast_date VARCHAR)", "sql": "SELECT viewers__millions_ FROM table_24212608_1 WHERE broadcast_date = 2010;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Update the hours_served column in the volunteers table to set the value to 35.00 for the record with id = 4.", "schema": "CREATE TABLE volunteers (id INT, name VARCHAR(50), hours_served FLOAT); INSERT INTO volunteers (id, name, hours_served) VALUES (4, 'Olivia Thompson', 25.00);", "sql": "UPDATE volunteers SET hours_served = 35.00 WHERE id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many technology accessibility patents were granted to women in tech in 2019?", "schema": "CREATE TABLE tech_accessibility_patents (id INT, year INT, community VARCHAR(255), type VARCHAR(255)); INSERT INTO tech_accessibility_patents (id, year, community, type) VALUES (1, 2019, 'Women in Tech', 'Technology Accessibility'); INSERT INTO tech_accessibility_patents (id, year, community, type) VALUES (2, 2020, 'Minority Tech Group', 'AI for Good');", "sql": "SELECT COUNT(*) FROM tech_accessibility_patents WHERE year = 2019 AND community = 'Women in Tech';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many matches were 44?", "schema": "CREATE TABLE table_24039597_26 (dismissals VARCHAR, matches VARCHAR)", "sql": "SELECT COUNT(dismissals) FROM table_24039597_26 WHERE matches = 44;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the number of new users who signed up from each country, in the last month?", "schema": "CREATE TABLE users (user_id INT, user_name TEXT, user_country TEXT, user_signup_date DATE);", "sql": "SELECT user_country, COUNT(DISTINCT user_id) as new_users FROM users WHERE user_signup_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY user_country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score when november 15 is the date?", "schema": "CREATE TABLE table_name_86 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_86 WHERE date = 'november 15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Winning team has 22 July as a Date?", "schema": "CREATE TABLE table_name_79 (winning_team VARCHAR, date VARCHAR)", "sql": "SELECT winning_team FROM table_name_79 WHERE date = '22 july';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Rank the water treatment plants in India by the amount of wastewater treated daily in descending order.", "schema": "CREATE TABLE india_wastewater_treatment (id INT, plant_name VARCHAR(50), daily_wastewater_treated FLOAT); INSERT INTO india_wastewater_treatment (id, plant_name, daily_wastewater_treated) VALUES (1, 'Bangalore Plant', 500), (2, 'Mumbai Plant', 600), (3, 'Delhi Plant', 400), (4, 'Chennai Plant', 450);", "sql": "SELECT plant_name, daily_wastewater_treated, RANK() OVER (ORDER BY daily_wastewater_treated DESC) as rank FROM india_wastewater_treatment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 138, "num_statements": 1} {"question": "Find the 'total area' of 'coniferous' forests in '2021'.", "schema": "CREATE TABLE forests (id INT, biome VARCHAR(50), area FLOAT, year INT); INSERT INTO forests (id, biome, area, year) VALUES (1, 'coniferous', 5000.0, 2021);", "sql": "SELECT SUM(area) FROM forests WHERE biome = 'coniferous' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the round where winners from the previous round totals 8?", "schema": "CREATE TABLE table_18328569_1 (round VARCHAR, winners_from_previous_round VARCHAR)", "sql": "SELECT round FROM table_18328569_1 WHERE winners_from_previous_round = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Update security policies' category", "schema": "CREATE TABLE security_policies (id INT, policy_id VARCHAR(255), policy_name VARCHAR(255), category VARCHAR(255), last_updated DATETIME); INSERT INTO security_policies (id, policy_id, policy_name, category, last_updated) VALUES (1, 'POL-002', 'Incident Response', 'Detection', '2021-07-01 11:00:00');", "sql": "UPDATE security_policies SET category = 'Monitoring' WHERE policy_id = 'POL-002';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "List members who did yoga or zumba workouts and their total workouts.", "schema": "CREATE TABLE membership_data (member_id INT, join_date DATE); CREATE TABLE workout_data (workout_id INT, member_id INT, workout_type VARCHAR(20), workout_date DATE);", "sql": "SELECT m.member_id, m.join_date, COUNT(w.workout_id) as total_workouts FROM membership_data m JOIN workout_data w ON m.member_id = w.member_id WHERE w.workout_type IN ('yoga', 'zumba') GROUP BY m.member_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Date, when Label is Jugoton?", "schema": "CREATE TABLE table_name_48 (date VARCHAR, label VARCHAR)", "sql": "SELECT date FROM table_name_48 WHERE label = 'jugoton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE FUNCTION (example 1).", "schema": null, "sql": "CREATE FUNCTION foo(int) ... CREATE FUNCTION foo(int, OUT text) ...;", "explanation": "PostgreSQL CREATE FUNCTION command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What was the total revenue for lipsticks sold in the USA in Q1 2022?", "schema": "CREATE TABLE cosmetics_sales(product_type VARCHAR(255), country VARCHAR(255), sales_quantity INT, sales_revenue DECIMAL(10,2));", "sql": "SELECT SUM(sales_revenue) FROM cosmetics_sales WHERE product_type = 'lipstick' AND country = 'USA' AND sales_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Delete all records in the ProductIngredients table with organic ingredients.", "schema": "CREATE TABLE ProductIngredients (productID INT, ingredient VARCHAR(50), organic BOOLEAN); INSERT INTO ProductIngredients (productID, ingredient, organic) VALUES (1, 'Aloe Vera', true), (2, 'Chamomile', true), (3, 'Retinol', false), (4, 'Hyaluronic Acid', false);", "sql": "DELETE FROM ProductIngredients WHERE organic = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 665).", "schema": null, "sql": "SELECT to_char(val, '9999999999999999.999999999999999PR')\n\tFROM num_data;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char(val, '9999999999999999.999999999999999PR')\n\tFROM num_data) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value for 2009 when the value for 2011 is 138?", "schema": "CREATE TABLE table_name_87 (Id VARCHAR)", "sql": "SELECT 2009 FROM table_name_87 WHERE 2011 = '138';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the id and weight of all pets whose age is older than 1.", "schema": "CREATE TABLE pets (petid VARCHAR, weight VARCHAR, pet_age INTEGER)", "sql": "SELECT petid, weight FROM pets WHERE pet_age > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What are the mines with labor turnover rates higher than 0.08?", "schema": "CREATE TABLE labor_force (mine_name VARCHAR(255), employee_count INT, turnover_rate FLOAT); INSERT INTO labor_force (mine_name, employee_count, turnover_rate) VALUES ('Green Valley', 250, 0.09); INSERT INTO labor_force (mine_name, employee_count, turnover_rate) VALUES ('Blue Hills', 300, 0.07);", "sql": "SELECT mine_name FROM labor_force WHERE turnover_rate > 0.08;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the runners-up in the game that was won by Cork City F.C. on 10/05/1998?", "schema": "CREATE TABLE table_name_48 (runners_up VARCHAR, winners VARCHAR, date VARCHAR)", "sql": "SELECT runners_up FROM table_name_48 WHERE winners = 'cork city f.c.' AND date = '10/05/1998';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT PLAYER HAS UPLAND HIGH SCHOOL?", "schema": "CREATE TABLE table_name_88 (player VARCHAR, hometown_school VARCHAR)", "sql": "SELECT player FROM table_name_88 WHERE hometown_school = 'upland high school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Europe/Africa group i's opponent?", "schema": "CREATE TABLE table_name_61 (opponent VARCHAR, zone VARCHAR)", "sql": "SELECT opponent FROM table_name_61 WHERE zone = 'europe/africa group i';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum response time for any emergency in Miami-Dade County in 2021?", "schema": "CREATE TABLE emergency_responses (id INT, incident_id INT, response_time INT, city VARCHAR(255), state VARCHAR(255), county VARCHAR(255)); INSERT INTO emergency_responses (id, incident_id, response_time, city, state, county) VALUES (1, 1, 15, 'Miami', 'Florida', 'Miami-Dade County'); INSERT INTO emergency_responses (id, incident_id, response_time, city, state, county) VALUES (2, 2, 8, 'Miami Beach', 'Florida', 'Miami-Dade County');", "sql": "SELECT MAX(response_time) FROM emergency_responses WHERE county = 'Miami-Dade County' AND reported_date >= '2021-01-01' AND reported_date < '2022-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "How many unique mineral types are there in the 'mineral_survey' table?", "schema": "CREATE TABLE mineral_survey (id INT, mine_name VARCHAR, mineral VARCHAR, percentage_composition DECIMAL); INSERT INTO mineral_survey (id, mine_name, mineral, percentage_composition) VALUES (1, 'Crystal Mine', 'Quartz', 45.00), (2, 'Gemstone Gulch', 'Emerald', 75.00), (3, 'Ore Mountain', 'Gold', 90.00), (4, 'Granite Grove', 'Granite', 100.00);", "sql": "SELECT COUNT(DISTINCT mineral) FROM mineral_survey;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total production volume for each region, and what is the percentage of the total production volume accounted for by each region?", "schema": "CREATE TABLE production_volume (volume_id INT, well_id INT, production_year INT, production_volume FLOAT, region VARCHAR(50)); INSERT INTO production_volume (volume_id, well_id, production_year, production_volume, region) VALUES (3, 3, 2022, 220.0, 'Asia-Pacific'); INSERT INTO production_volume (volume_id, well_id, production_year, production_volume, region) VALUES (4, 4, 2023, 180.5, 'Africa');", "sql": "SELECT region, SUM(production_volume) as total_volume, PERCENTAGE_RANK() OVER (ORDER BY SUM(production_volume) DESC) as percentage_of_total FROM production_volume GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 179, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Record (example 65).", "schema": null, "sql": "SELECT * FROM test_type_record_error2();", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many were in the Kargahar (vidhan sabha constituency)?", "schema": "CREATE TABLE table_29785324_5 (constituency_no INTEGER, vidhan_sabha_constituency VARCHAR)", "sql": "SELECT MIN(constituency_no) FROM table_29785324_5 WHERE vidhan_sabha_constituency = 'Kargahar (Vidhan Sabha constituency)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who wrote the episodes that had a viewership of 7.14?", "schema": "CREATE TABLE table_24910737_1 (written_by VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT written_by FROM table_24910737_1 WHERE us_viewers__millions_ = '7.14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Count the number of public transportation trips taken in New York City for the month of January in the year 2022.", "schema": "CREATE TABLE trips (trip_id INT, trip_date DATE, trip_type VARCHAR(50), city VARCHAR(50)); INSERT INTO trips (trip_id, trip_date, trip_type, city) VALUES (1, '2022-01-01', 'Public Transportation', 'New York City'), (2, '2022-01-05', 'Taxi', 'New York City'), (3, '2022-01-10', 'Public Transportation', 'New York City');", "sql": "SELECT COUNT(*) FROM trips WHERE trip_type = 'Public Transportation' AND EXTRACT(MONTH FROM trip_date) = 1 AND EXTRACT(YEAR FROM trip_date) = 2022 AND city = 'New York City';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Update the country for volunteer 'Bob' to Canada.", "schema": "CREATE TABLE Volunteers (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO Volunteers (id, name, country) VALUES (1, 'Alice', 'United States'), (2, 'Bob', 'United States');", "sql": "UPDATE Volunteers SET country = 'Canada' WHERE name = 'Bob';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the minimum score obtained in the game 'Quantum Quest'?", "schema": "CREATE TABLE Game_Scores (Player_ID INT, Player_Name VARCHAR(50), Game_Name VARCHAR(50), Score INT); INSERT INTO Game_Scores (Player_ID, Player_Name, Game_Name, Score) VALUES (1, 'Olga Petrova', 'Ukraine', 400), (2, 'Ricardo Gonzales', 'Argentina', 700), (3, 'Elif Yilmaz', 'Turkey', 500), (4, 'Jordan Bennett', 'Australia', 600), (5, 'Nina Jensen', 'Norway', 800);", "sql": "SELECT MIN(Score) FROM Game_Scores WHERE Game_Name = 'Quantum Quest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the team captain of the team that Lübzer is the shirt sponsor for?", "schema": "CREATE TABLE table_name_88 (team VARCHAR, shirt_sponsor VARCHAR)", "sql": "SELECT team AS captain FROM table_name_88 WHERE shirt_sponsor = 'lübzer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many students participated in each support program?", "schema": "CREATE TABLE Students(student_id INT, name TEXT);CREATE TABLE Programs(program_id INT, program_name TEXT);CREATE TABLE Student_Programs(student_id INT, program_id INT);", "sql": "SELECT p.program_name, COUNT(sp.student_id) FROM Programs p INNER JOIN Student_Programs sp ON p.program_id = sp.program_id GROUP BY p.program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name on the Socialist ticket when the Democratic ticket is george k. shuler?", "schema": "CREATE TABLE table_name_6 (socialist_ticket VARCHAR, democratic_ticket VARCHAR)", "sql": "SELECT socialist_ticket FROM table_name_6 WHERE democratic_ticket = 'george k. shuler';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What are the names and locations of climate finance projects that received more funding than any project in Europe?", "schema": "CREATE TABLE climate_finance_projects ( id INT, name VARCHAR(255), location VARCHAR(255), funding FLOAT ); INSERT INTO climate_finance_projects (id, name, location, funding) VALUES (1, 'Project P', 'Europe', 6000000); INSERT INTO climate_finance_projects (id, name, location, funding) VALUES (2, 'Project Q', 'Europe', 8000000);", "sql": "SELECT name, location FROM climate_finance_projects WHERE funding > (SELECT MAX(funding) FROM climate_finance_projects WHERE location = 'Europe');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What is the total number of certified sustainable tourism businesses in Italy?", "schema": "CREATE TABLE sustainable_tourism (id INT, name TEXT, country TEXT, is_certified BOOLEAN); INSERT INTO sustainable_tourism (id, name, country, is_certified) VALUES (1, 'Eco Hotel', 'Italy', true), (2, 'Green Tourism', 'Italy', true), (3, 'Sustainable Travel Italy', 'Italy', false);", "sql": "SELECT COUNT(*) FROM sustainable_tourism WHERE country = 'Italy' AND is_certified = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "How many members have a gym membership starting in January?", "schema": "CREATE TABLE gym_memberships(member_id INT, start_date DATE); INSERT INTO gym_memberships(member_id, start_date) VALUES (1, '2022-01-01'); INSERT INTO gym_memberships(member_id, start_date) VALUES (2, '2022-02-01'); INSERT INTO gym_memberships(member_id, start_date) VALUES (3, '2021-12-15'); INSERT INTO gym_memberships(member_id, start_date) VALUES (4, '2022-01-15');", "sql": "SELECT COUNT(member_id) FROM gym_memberships WHERE MONTH(start_date) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many opponents played 1 game with a result win?", "schema": "CREATE TABLE table_18207285_2 (opponents INTEGER, result VARCHAR, game VARCHAR)", "sql": "SELECT MAX(opponents) FROM table_18207285_2 WHERE result = 'Win' AND game = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total number of sustainable tour packages sold by vendors from Oceania?", "schema": "CREATE TABLE Vendors (VendorID INT, VendorName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Vendors (VendorID, VendorName, Country) VALUES (1, 'GreenVacations', 'Australia'), (2, 'EcoTours', 'New Zealand'), (3, 'SustainableJourneys', 'Fiji'), (4, 'BluePlanetTours', 'USA'); CREATE TABLE Packages (PackageID INT, VendorID INT, PackageType VARCHAR(20), Sales INT); INSERT INTO Packages (PackageID, VendorID, PackageType, Sales) VALUES (1, 1, 'Sustainable', 500), (2, 1, 'Virtual', 300), (3, 2, 'Sustainable', 700), (4, 2, 'Virtual', 600), (5, 3, 'Sustainable', 800), (6, 3, 'Virtual', 400), (7, 4, 'Sustainable', 50), (8, 4, 'Virtual', 60);", "sql": "SELECT V.Country, SUM(P.Sales) as TotalSales FROM Vendors V INNER JOIN Packages P ON V.VendorID = P.VendorID WHERE V.Country LIKE 'Oceania%' AND P.PackageType = 'Sustainable' GROUP BY V.Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the largest caps value for Glen Moss?", "schema": "CREATE TABLE table_name_50 (caps INTEGER, player VARCHAR)", "sql": "SELECT MAX(caps) FROM table_name_50 WHERE player = 'glen moss';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Add a new soil moisture reading of 42 for sensor S102", "schema": "CREATE TABLE soil_moisture_sensors (sensor_id VARCHAR(10), moisture_level INT);", "sql": "INSERT INTO soil_moisture_sensors (sensor_id, moisture_level) VALUES ('S102', 42);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the name of the tournament played on Jun 17, 1973?", "schema": "CREATE TABLE table_name_3 (tournament VARCHAR, date VARCHAR)", "sql": "SELECT tournament FROM table_name_3 WHERE date = 'jun 17, 1973';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue of the 1990 Asian games?", "schema": "CREATE TABLE table_name_77 (venue VARCHAR, competition VARCHAR)", "sql": "SELECT venue FROM table_name_77 WHERE competition = '1990 asian games';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1st leg of Team 2 PSV Eindhoven?", "schema": "CREATE TABLE table_name_21 (team_2 VARCHAR)", "sql": "SELECT 1 AS st_leg FROM table_name_21 WHERE team_2 = 'psv eindhoven';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total plastic waste generation per capita by state in the USA in 2021?", "schema": "CREATE TABLE waste_generation (id INT, state VARCHAR(50), waste_amount FLOAT, waste_type VARCHAR(50), year INT); INSERT INTO waste_generation (id, state, waste_amount, waste_type, year) VALUES (1, 'California', 120000, 'plastic', 2021), (2, 'Texas', 100000, 'plastic', 2021), (3, 'New York', 90000, 'plastic', 2021);", "sql": "SELECT state, SUM(waste_amount) / (SELECT SUM(population) FROM populations WHERE populations.state = state AND populations.year = 2021) as per_capita FROM waste_generation WHERE waste_type = 'plastic' AND year = 2021 GROUP BY state ORDER BY per_capita DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 257, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date did season 12 premiere?", "schema": "CREATE TABLE table_2655016_4 (original_air_date VARCHAR, season__number VARCHAR)", "sql": "SELECT original_air_date FROM table_2655016_4 WHERE season__number = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the school years where class AAAA is in Gregory-Portland?", "schema": "CREATE TABLE table_14603212_5 (school_year VARCHAR, class_aAAA VARCHAR, Gregory VARCHAR, Portland VARCHAR)", "sql": "SELECT school_year FROM table_14603212_5 WHERE class_aAAA = Gregory - Portland;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result on September 20?", "schema": "CREATE TABLE table_name_99 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_99 WHERE date = 'september 20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average sustainability score of manufacturers who produce products in the 'accessories' category?", "schema": "CREATE TABLE manufacturers (id INT PRIMARY KEY, name TEXT, location TEXT, sustainability_score INT); CREATE TABLE products (id INT PRIMARY KEY, name TEXT, category TEXT, price DECIMAL, manufacturer_id INT, FOREIGN KEY (manufacturer_id) REFERENCES manufacturers(id));", "sql": "SELECT AVG(m.sustainability_score) FROM manufacturers m JOIN products p ON m.id = p.manufacturer_id WHERE p.category = 'accessories';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Who are the suppliers located in India?", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(255), location VARCHAR(255), sustainable_practices BOOLEAN); CREATE TABLE garments (id INT, name VARCHAR(255), category VARCHAR(255), price DECIMAL(5,2), quantity INT, supplier_id INT); CREATE TABLE manufacturing_costs (id INT, garment_id INT, labor_cost DECIMAL(5,2), material_cost DECIMAL(5,2), manufacturing_time INT); INSERT INTO suppliers (id, name, location, sustainable_practices) VALUES (3, 'Supplier C', 'India', true); INSERT INTO garments (id, name, category, price, quantity, supplier_id) VALUES (3, 'Garment Z', 'Bottoms', 45.99, 25, 3); INSERT INTO manufacturing_costs (id, garment_id, labor_cost, material_cost, manufacturing_time) VALUES (3, 3, 22.50, 21.25, 40);", "sql": "SELECT suppliers.name FROM suppliers WHERE suppliers.location = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many different municipal mayors were there in the municipality with an area of 42.66 km2?", "schema": "CREATE TABLE table_216776_2 (municipal_mayor VARCHAR, area__km²_ VARCHAR)", "sql": "SELECT COUNT(municipal_mayor) FROM table_216776_2 WHERE area__km²_ = '42.66';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "List the total number of claims and total claim amount for each product type.", "schema": "CREATE TABLE Claim (ClaimID INT, PolicyholderID INT, ClaimDate DATE, Product VARCHAR(10), ClaimAmount DECIMAL(10,2)); INSERT INTO Claim (ClaimID, PolicyholderID, ClaimDate, Product, ClaimAmount) VALUES (1, 1, '2020-01-01', 'Auto', 500), (2, 1, '2021-01-01', 'Auto', 1000), (3, 2, '2020-01-01', 'Home', 2000), (4, 3, '2018-01-01', 'Auto', 1500);", "sql": "SELECT Product, COUNT(*) AS TotalClaims, SUM(ClaimAmount) AS TotalClaimAmount FROM Claim GROUP BY Product;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the most popular game genre among players in the EU region?", "schema": "CREATE TABLE PlayerGenre (PlayerID INT PRIMARY KEY, Region VARCHAR(10), GameType VARCHAR(20)); INSERT INTO PlayerGenre (PlayerID, Region, GameType) VALUES (1, 'EU', 'Strategy'); INSERT INTO PlayerGenre (PlayerID, Region, GameType) VALUES (2, 'NA', 'FPS'); INSERT INTO PlayerGenre (PlayerID, Region, GameType) VALUES (3, 'EU', 'RPG');", "sql": "SELECT GameType, COUNT(*) AS Count FROM PlayerGenre WHERE Region = 'EU' GROUP BY GameType ORDER BY Count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "Delete all records in the 'military_equipment' table where 'equipment_type' is 'aircraft'", "schema": "CREATE TABLE military_equipment (equipment_id INT PRIMARY KEY, equipment_type VARCHAR(20), country VARCHAR(20), in_service BOOLEAN);", "sql": "DELETE FROM military_equipment WHERE equipment_type = 'aircraft';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average duration of virtual tours in Colombia?", "schema": "CREATE TABLE virtual_tours_co (tour_id INT, tour_name VARCHAR(255), country VARCHAR(255), duration INT); INSERT INTO virtual_tours_co (tour_id, tour_name, country, duration) VALUES (1, 'Virtual Tour Bogota', 'Colombia', 60); INSERT INTO virtual_tours_co (tour_id, tour_name, country, duration) VALUES (2, 'Virtual Tour Medellin', 'Colombia', 75); INSERT INTO virtual_tours_co (tour_id, tour_name, country, duration) VALUES (3, 'Virtual Tour Cartagena', 'Colombia', 90);", "sql": "SELECT country, AVG(duration) FROM virtual_tours_co WHERE country = 'Colombia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List all military equipment maintenance activities performed on aircraft in the Asia-Pacific region in the last 6 months.", "schema": "CREATE TABLE equipment_maintenance (maintenance_id INT, maintenance_date DATE, equipment_type VARCHAR(255), region VARCHAR(255)); INSERT INTO equipment_maintenance (maintenance_id, maintenance_date, equipment_type, region) VALUES (1, '2021-12-31', 'aircraft', 'Asia-Pacific'), (2, '2022-04-04', 'tank', 'Europe'), (3, '2022-06-15', 'aircraft', 'Asia-Pacific');", "sql": "SELECT * FROM equipment_maintenance WHERE equipment_type = 'aircraft' AND region = 'Asia-Pacific' AND maintenance_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "How many TV shows were released per year in the 'Drama' genre and their average IMDb ratings?", "schema": "CREATE TABLE TVShows (ShowID INT, Title VARCHAR(255), ReleaseYear INT, Genre VARCHAR(50), IMDbRating DECIMAL(3,2));", "sql": "SELECT ReleaseYear, Genre, COUNT(Title) AS Number_Of_Shows, AVG(IMDbRating) AS Avg_Rating FROM TVShows WHERE Genre = 'Drama' GROUP BY ReleaseYear, Genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What method was used in the match that went to round 1, and had a 6-3-1 record?", "schema": "CREATE TABLE table_name_13 (method VARCHAR, round VARCHAR, record VARCHAR)", "sql": "SELECT method FROM table_name_13 WHERE round = 1 AND record = '6-3-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the running total of 'online_time' for each user, for the 'online_learning' database, ordered by user_id and date?", "schema": "CREATE TABLE online_learning (id INT, user_id INT, online_date DATE, online_time INT); INSERT INTO online_learning (id, user_id, online_date, online_time) VALUES (1, 1001, '2022-01-01', 60); INSERT INTO online_learning (id, user_id, online_date, online_time) VALUES (2, 1001, '2022-01-03', 90); INSERT INTO online_learning (id, user_id, online_date, online_time) VALUES (3, 1002, '2022-01-02', 45);", "sql": "SELECT user_id, online_date, online_time, SUM(online_time) OVER (PARTITION BY user_id ORDER BY online_date) as running_total FROM online_learning;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the score l 111–122 (ot)?", "schema": "CREATE TABLE table_name_84 (date VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_84 WHERE score = 'l 111–122 (ot)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the rank of the total number of labor hours worked by each construction trade for each state?", "schema": "CREATE TABLE labor_hours_by_state (state TEXT, trade TEXT, hours INT); INSERT INTO labor_hours_by_state (state, trade, hours) VALUES ('California', 'Carpentry', 9000), ('California', 'Electrical', 11000), ('California', 'Plumbing', 8000), ('Texas', 'Carpentry', 7000), ('Texas', 'Electrical', 9000), ('Texas', 'Plumbing', 10000);", "sql": "SELECT state, trade, RANK() OVER (PARTITION BY state ORDER BY hours DESC) FROM labor_hours_by_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 100, "num_statements": 1} {"question": "Insert new product 'Eco-friendly Product4' with is_organic = TRUE in 'FarmersMarket' view", "schema": "CREATE VIEW FarmersMarket AS SELECT * FROM Products WHERE is_organic = TRUE; INSERT INTO Products (id, name, is_organic) VALUES (1, 'Product1', TRUE), (2, 'Product2', FALSE), (3, 'Product3', TRUE);", "sql": "INSERT INTO Products (id, name, is_organic) VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM Products), 'Eco-friendly Product4', TRUE); INSERT INTO FarmersMarket SELECT * FROM Products WHERE is_organic = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What % has 4 RLng?", "schema": "CREATE TABLE table_name_80 (int_percentage VARCHAR, rlng VARCHAR)", "sql": "SELECT int_percentage FROM table_name_80 WHERE rlng = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the high rebounds from the date of April 14?", "schema": "CREATE TABLE table_name_29 (high_rebounds VARCHAR, date VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_29 WHERE date = 'april 14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the grid value which has Time/Retired value of accident?", "schema": "CREATE TABLE table_name_57 (grid VARCHAR, time_retired VARCHAR)", "sql": "SELECT grid FROM table_name_57 WHERE time_retired = 'accident';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Find the maximum weight of containers handled daily by a single crane in 'Cairo'.", "schema": "CREATE TABLE port (port_id INT, name TEXT);CREATE TABLE crane (crane_id INT, port_id INT, name TEXT);CREATE TABLE container (container_id INT, crane_id INT, weight INT, handled_at DATETIME);INSERT INTO port VALUES (11, 'Cairo');", "sql": "SELECT crane.name, DATE(container.handled_at) AS handling_date, MAX(container.weight) AS heaviest_weight FROM crane JOIN port ON crane.port_id = port.port_id JOIN container ON crane.crane_id = container.crane_id WHERE port.name = 'Cairo' GROUP BY crane.name, handling_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 273, "num_statements": 1} {"question": "Which customers have the highest transaction amounts?", "schema": "CREATE TABLE customers (customer_id INT, customer_name VARCHAR(20), transaction_id INT); INSERT INTO customers (customer_id, customer_name, transaction_id) VALUES (1, 'John Doe', 1), (2, 'Jane Smith', 2), (3, 'Bob Johnson', 3);", "sql": "SELECT c.customer_name, SUM(t.amount) as total_amount FROM customers c JOIN transactions t ON c.transaction_id = t.transaction_id GROUP BY c.customer_name ORDER BY total_amount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Show all intelligence agencies in the 'Africa' schema.", "schema": "CREATE SCHEMA Africa; CREATE TABLE IntelligenceAgencies (id INT, name VARCHAR(255), location VARCHAR(255), date DATE); INSERT INTO IntelligenceAgencies (id, name, location, date) VALUES (1, 'NISA', 'Nigeria', '2013-08-01'); INSERT INTO IntelligenceAgencies (id, name, location, date) VALUES (2, 'DGSE', 'Algeria', '1989-12-12');", "sql": "SELECT * FROM Africa.IntelligenceAgencies;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total for set 2 17-25?", "schema": "CREATE TABLE table_name_80 (total VARCHAR, set_2 VARCHAR)", "sql": "SELECT total FROM table_name_80 WHERE set_2 = '17-25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the maximum number of sustainable clothing items sold in a single transaction?", "schema": "CREATE TABLE Transactions (id INT, customer_id INT, items_sold INT); INSERT INTO Transactions (id, customer_id, items_sold) VALUES (1, 1, 3), (2, 2, 1), (3, 3, 2), (4, 4, 4), (5, 5, 5), (6, 6, 1);", "sql": "SELECT MAX(items_sold) FROM Transactions WHERE EXISTS (SELECT 1 FROM Sales WHERE Transactions.customer_id = Sales.id AND material IN ('Organic Cotton', 'Hemp', 'Recycled Polyester', 'Tencel', 'Bamboo'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the episode first air that had a production code of 102?", "schema": "CREATE TABLE table_18335117_2 (original_air_date VARCHAR, production_code VARCHAR)", "sql": "SELECT original_air_date FROM table_18335117_2 WHERE production_code = 102;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many players play games on mobile devices?", "schema": "CREATE TABLE PlayerDevice (PlayerID INT, Age INT, Device VARCHAR(20)); INSERT INTO PlayerDevice (PlayerID, Age, Device) VALUES (1, 15, 'PC'), (2, 20, 'Mobile'), (3, 18, 'Tablet');", "sql": "SELECT COUNT(*) FROM PlayerDevice WHERE Device = 'Mobile';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For each account type, find the average account balance of customers with credit score lower than 50.", "schema": "CREATE TABLE customer (acc_type VARCHAR, acc_bal INTEGER, credit_score INTEGER)", "sql": "SELECT AVG(acc_bal), acc_type FROM customer WHERE credit_score < 50 GROUP BY acc_type;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total cargo weight transported by each vessel that visited Australian ports?", "schema": "CREATE TABLE cargo (id INT, vessel_name VARCHAR(255), cargo_weight INT, port VARCHAR(255), unload_date DATE); INSERT INTO cargo (id, vessel_name, cargo_weight, port, unload_date) VALUES (1, 'VesselA', 12000, 'Sydney', '2021-12-20');", "sql": "SELECT vessel_name, SUM(cargo_weight) as total_weight FROM cargo WHERE port IN ('Sydney', 'Melbourne', 'Brisbane', 'Perth', 'Adelaide') GROUP BY vessel_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'stats' (example 368).", "schema": null, "sql": "INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 4500)i;", "explanation": "DML from PostgreSQL core regression test for Stats.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "How many farms are there in each country?", "schema": "CREATE TABLE FarmCount (country VARCHAR(50), num_farms INT); INSERT INTO FarmCount (country, num_farms) VALUES ('USA', 5000), ('Canada', 4000), ('Mexico', 3000);", "sql": "SELECT country, num_farms FROM FarmCount;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show production figures for wells in the Gulf of Mexico.", "schema": "CREATE TABLE wells (well_id INT, country VARCHAR(50), production FLOAT); INSERT INTO wells (well_id, country, production) VALUES (1, 'USA - Gulf of Mexico', 1000), (2, 'Canada', 1500), (3, 'Norway', 800);", "sql": "SELECT production FROM wells WHERE country LIKE '%Gulf of Mexico%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Which countries have the most diverse set of authors?", "schema": "CREATE TABLE Authors (id INT, name TEXT, country TEXT); INSERT INTO Authors (id, name, country) VALUES (1, 'Author 1', 'United States'), (2, 'Author 2', 'Canada'), (3, 'Author 3', 'United States');", "sql": "SELECT country, COUNT(DISTINCT name) as unique_authors FROM Authors GROUP BY country ORDER BY unique_authors DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Find the number of players for each game genre.", "schema": "CREATE TABLE PlayerCount (GameID int, GameName varchar(100), Genre varchar(50), PlayerCount int); INSERT INTO PlayerCount VALUES (1, 'GameA', 'Action', 100000), (2, 'GameB', 'RPG', 120000), (3, 'GameC', 'Action', 150000), (4, 'GameD', 'Simulation', 110000);", "sql": "SELECT Genre, SUM(PlayerCount) as TotalPlayers FROM PlayerCount GROUP BY Genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Calculate the year-over-year production change for each oil field", "schema": "CREATE TABLE production (id INT, field_name VARCHAR(50), year INT, qty FLOAT); INSERT INTO production (id, field_name, year, qty) VALUES (1, 'Galkynysh', 2018, 100000); INSERT INTO production (id, field_name, year, qty) VALUES (2, 'Galkynysh', 2019, 120000); INSERT INTO production (id, field_name, year, qty) VALUES (3, 'Samotlor', 2018, 110000); INSERT INTO production (id, field_name, year, qty) VALUES (4, 'Samotlor', 2019, 105000);", "sql": "SELECT a.field_name, (b.qty - a.qty) / a.qty as yoy_change FROM production a JOIN production b ON a.field_name = b.field_name WHERE a.year = (YEAR(CURRENT_DATE) - 1) AND b.year = YEAR(CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "What is the maximum score achieved by a student in each district?", "schema": "CREATE TABLE student_mental_health (student_id INT, district_id INT, score INT); INSERT INTO student_mental_health (student_id, district_id, score) VALUES (1, 101, 80), (2, 101, 75), (3, 102, 85), (4, 102, 90), (5, 103, 65);", "sql": "SELECT district_id, MAX(score) as max_score FROM student_mental_health GROUP BY district_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the year for the game dead space 2?", "schema": "CREATE TABLE table_name_53 (year VARCHAR, game VARCHAR)", "sql": "SELECT year FROM table_name_53 WHERE game = 'dead space 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Loss had an Attendance of 17,675?", "schema": "CREATE TABLE table_name_27 (loss VARCHAR, attendance VARCHAR)", "sql": "SELECT loss FROM table_name_27 WHERE attendance = '17,675';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of digital assets in circulation in India as of 2022-02-01?", "schema": "CREATE TABLE digital_assets (asset_name TEXT, in_circulation INTEGER, circulation_date DATE); INSERT INTO digital_assets (asset_name, in_circulation, circulation_date) VALUES ('Bitcoin', 18750000, '2022-02-01'), ('Ethereum', 115500000, '2022-02-01');", "sql": "SELECT SUM(in_circulation) FROM digital_assets WHERE circulation_date = '2022-02-01' AND asset_name IN ('Bitcoin', 'Ethereum');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What engine has 2 points?", "schema": "CREATE TABLE table_name_41 (engine_s_ VARCHAR, points VARCHAR)", "sql": "SELECT engine_s_ FROM table_name_41 WHERE points = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Airspeed Fleet Shadower aircraft are retired?", "schema": "CREATE TABLE table_13605170_2 (retired VARCHAR, aircraft_type VARCHAR)", "sql": "SELECT retired FROM table_13605170_2 WHERE aircraft_type = 'Airspeed Fleet Shadower';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games did they play in 1905?", "schema": "CREATE TABLE table_name_83 (played INTEGER, years VARCHAR)", "sql": "SELECT AVG(played) FROM table_name_83 WHERE years = '1905';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the home for detroit visitor and date of february 29", "schema": "CREATE TABLE table_name_25 (home VARCHAR, visitor VARCHAR, date VARCHAR)", "sql": "SELECT home FROM table_name_25 WHERE visitor = 'detroit' AND date = 'february 29';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the average production rate per well for wells in the Bakken formation?", "schema": "CREATE TABLE wells (well_id INT, well_name VARCHAR(255), well_type VARCHAR(255), location VARCHAR(255)); INSERT INTO wells VALUES (1, 'Well A', 'Onshore', 'Bakken Formation'); INSERT INTO wells VALUES (2, 'Well B', 'Onshore', 'Utica Shale');", "sql": "SELECT AVG(production_rate) FROM (SELECT well_id, production_rate FROM well_production WHERE location LIKE 'Bakken%' ORDER BY production_rate DESC) WHERE row_number() OVER (ORDER BY production_rate DESC) <= 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 210, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'generated_stored' (example 134).", "schema": null, "sql": "CREATE TABLE gtest11 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);", "explanation": "DDL from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the constructor of the car that had a Q1 pos of less than 8 and a Q1 time of 1:15.038?", "schema": "CREATE TABLE table_name_84 (constructor VARCHAR, q1_pos VARCHAR, q1_time VARCHAR)", "sql": "SELECT constructor FROM table_name_84 WHERE q1_pos < 8 AND q1_time = '1:15.038';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "pgTAP test for Enumtap (assertion 37).", "schema": null, "sql": "SELECT * FROM check_test(\n enums_are( array_append(___myenum('bug_status'), 'fredy') ),\n false,\n 'enums_are(enums) fail',\n 'Search path ' || pg_catalog.current_setting('search_path') || ' should have the correct enums',\n ' Extra types:\n bug_status\n Missing types:\n fredy'\n);", "explanation": "SQL assertion from pgTAP test suite for Enumtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 309, "num_statements": 1} {"question": "What is the total number of multimodal trips taken in New York City?", "schema": "CREATE TABLE multimodal_trips (id INT, trips INT, city VARCHAR(50));", "sql": "SELECT SUM(trips) FROM multimodal_trips WHERE city = 'New York City';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Update the sustainability_score for all organic produce suppliers in the suppliers table to 90.", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(255), category VARCHAR(255), sustainability_score INT);", "sql": "UPDATE suppliers SET sustainability_score = 90 WHERE category = 'organic produce';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 49).", "schema": null, "sql": "select jsonb_path_query('1', 'strict $.*', silent => true);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('1', 'strict $.*', silent => true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the opponents in district California 9?", "schema": "CREATE TABLE table_1341568_6 (opponent VARCHAR, district VARCHAR)", "sql": "SELECT opponent FROM table_1341568_6 WHERE district = 'California 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average safety rating for vehicles manufactured in Italy and Spain?", "schema": "CREATE TABLE Vehicles (ID INT, Manufacturer VARCHAR(255), SafetyRating FLOAT); INSERT INTO Vehicles (ID, Manufacturer, SafetyRating) VALUES (1, 'Ferrari', 4.5), (2, 'Lamborghini', 4.3), (3, 'Seat', 4.1), (4, 'Skoda', 4.2);", "sql": "SELECT AVG(SafetyRating) FROM Vehicles WHERE Manufacturer IN ('Italy', 'Spain');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Of all the contestants who got voted, what is the contestant number and name of the contestant who got least votes?", "schema": "CREATE TABLE votes (contestant_number VARCHAR); CREATE TABLE contestants (contestant_number VARCHAR, contestant_name VARCHAR)", "sql": "SELECT T1.contestant_number, T1.contestant_name FROM contestants AS T1 JOIN votes AS T2 ON T1.contestant_number = T2.contestant_number GROUP BY T1.contestant_number ORDER BY COUNT(*) LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent in the final of the Tournament of Blenheim?", "schema": "CREATE TABLE table_name_42 (opponent_in_the_final VARCHAR, tournament VARCHAR)", "sql": "SELECT opponent_in_the_final FROM table_name_42 WHERE tournament = 'blenheim';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Create a table named 'player_achievements'", "schema": "CREATE TABLE player_achievements (player_id INT, achievement_name VARCHAR(255), achievement_date DATE);", "sql": "CREATE TABLE player_achievements (player_id INT, achievement_name VARCHAR(255), achievement_date DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the total installed capacity (in MW) of renewable energy projects in the state of California?", "schema": "CREATE TABLE Projects (project_id INT, project_name VARCHAR(100), state VARCHAR(100), installed_capacity FLOAT);", "sql": "SELECT SUM(installed_capacity) FROM Projects WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the average distance traveled by astrophysics research on Mars?", "schema": "CREATE TABLE astrophysics_research (research_id INT, location VARCHAR(50), distance FLOAT); INSERT INTO astrophysics_research (research_id, location, distance) VALUES (1, 'Mars', 50.3), (2, 'Venus', 10.2), (3, 'Mars', 40.1), (4, 'Jupiter', 70.5), (5, 'Mars', 60.0);", "sql": "SELECT AVG(distance) FROM astrophysics_research WHERE location = 'Mars';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of unsuccessful cybersecurity incidents handled by the 'Cybersecurity Response Team' in the last decade?", "schema": "CREATE TABLE cybersecurity_incidents (id INT, team VARCHAR(255), success BOOLEAN, incident_date DATE);", "sql": "SELECT COUNT(*) as total_unsuccessful_incidents FROM cybersecurity_incidents WHERE team = 'Cybersecurity Response Team' AND success = FALSE AND incident_date >= DATE_SUB(CURRENT_DATE, INTERVAL 10 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Find the number of unique origins for freight with a cost greater than 500.", "schema": "CREATE TABLE Freight (id INT PRIMARY KEY, shipment_id INT, origin VARCHAR(50), destination VARCHAR(50), distance INT, cost FLOAT); INSERT INTO Freight (id, shipment_id, origin, destination, distance, cost) VALUES (1, 1, 'Mumbai', 'Delhi', 1400, 7200.5), (2, 2, 'Tokyo', 'Seoul', 2100, 1050.3), (3, 3, 'São Paulo', 'Buenos Aires', 1084, 542.7), (4, 4, 'Toronto', 'Montreal', 543, 271.5), (5, 5, 'Delhi', 'Mumbai', 1400, 7250.8), (6, 6, 'Mumbai', 'Kolkata', 1520, 810.9);", "sql": "SELECT COUNT(DISTINCT origin) FROM Freight WHERE cost > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Events is the highest one that has a Top-5 larger than 5, and a Top-10 larger than 28?", "schema": "CREATE TABLE table_name_93 (events INTEGER, top_5 VARCHAR, top_10 VARCHAR)", "sql": "SELECT MAX(events) FROM table_name_93 WHERE top_5 > 5 AND top_10 > 28;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of green buildings in the city of Vancouver, and what is their total floor area?", "schema": "CREATE TABLE green_buildings (id INT, name VARCHAR(255), city VARCHAR(255), floor_area FLOAT, certification_date DATE);", "sql": "SELECT COUNT(*) AS total_buildings, SUM(floor_area) AS total_floor_area FROM green_buildings WHERE city = 'Vancouver';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "What is the total number of digital assets issued by companies based in North America?", "schema": "CREATE TABLE Companies (id INT, name VARCHAR(255), region VARCHAR(255)); INSERT INTO Companies (id, name, region) VALUES (1, 'CompanyA', 'North America'), (2, 'CompanyB', 'Europe');", "sql": "SELECT COUNT(*) FROM Companies WHERE region = 'North America' JOIN DigitalAssets ON Companies.id = DigitalAssets.company_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'predicate' (example 36).", "schema": null, "sql": "INSERT INTO pred_tab_notnull VALUES (3, 3);", "explanation": "DML from PostgreSQL core regression test for Predicate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the maximum transaction amount in Florida?", "schema": "CREATE TABLE clients (id INT, name TEXT, age INT, state TEXT, transaction_amount DECIMAL(10,2)); INSERT INTO clients (id, name, age, state, transaction_amount) VALUES (1, 'John Doe', 35, 'Florida', 700.00); INSERT INTO clients (id, name, age, state, transaction_amount) VALUES (2, 'Jane Smith', 40, 'Florida', 650.50);", "sql": "SELECT MAX(transaction_amount) FROM clients WHERE state = 'Florida';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Competition, when Date is \"January 11, 1996\", when Venue is \"San Diego , United States\"?", "schema": "CREATE TABLE table_name_72 (competition VARCHAR, date VARCHAR, venue VARCHAR)", "sql": "SELECT competition FROM table_name_72 WHERE date = 'january 11, 1996' AND venue = 'san diego , united states';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the average CO2 emission per material?", "schema": "CREATE TABLE CO2Emissions (EmissionID INT, Material VARCHAR(50), CO2 DECIMAL(5,2)); INSERT INTO CO2Emissions (EmissionID, Material, CO2) VALUES (1, 'Organic Cotton', 3.50), (2, 'Hemp', 2.80), (3, 'Recycled Polyester', 4.20), (4, 'Tencel', 3.10);", "sql": "SELECT Material, AVG(CO2) AS AvgCO2Emission FROM CO2Emissions GROUP BY Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Display the total claim amount for each policy type in the month of December", "schema": "CREATE TABLE claims (claim_id INT, policy_id INT, claim_amount DECIMAL(10,2), claim_date DATE, policy_type VARCHAR(20));", "sql": "SELECT policy_type, SUM(claim_amount) FROM claims WHERE MONTH(claim_date) = 12 GROUP BY policy_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Which container ships have had their capacity updated in the last month?", "schema": "CREATE TABLE ship_updates (update_id INT, ship_name VARCHAR(50), capacity INT, update_date DATE); INSERT INTO ship_updates VALUES (1, 'MSC Maya', 19224, '2022-03-15'); INSERT INTO ship_updates VALUES (2, 'OOCL Hong Kong', 21413, '2022-02-20'); INSERT INTO ship_updates VALUES (3, 'Ever Given', 20000, '2022-03-08');", "sql": "SELECT ship_name, update_date FROM ship_updates WHERE update_date > DATEADD(MONTH, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Find the total energy production (in MWh) from renewable energy sources for each state in the renewable_energy_production table.", "schema": "CREATE TABLE renewable_energy_production (state VARCHAR(50), year INT, energy_production FLOAT, energy_source VARCHAR(50));", "sql": "SELECT state, SUM(energy_production) as total_renewable_energy FROM renewable_energy_production WHERE energy_source = 'Wind' OR energy_source = 'Solar' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "For cases with an outcome of 'settled', find the client's first and last name, state, and the difference between the case closing date and the case opening date, in descending order by the difference.", "schema": "CREATE TABLE Cases (CaseID INT, ClientFirstName VARCHAR(50), ClientLastName VARCHAR(50), State VARCHAR(2), CaseOutcome VARCHAR(20), OpenDate DATE, CloseDate DATE); INSERT INTO Cases (CaseID, ClientFirstName, ClientLastName, State, CaseOutcome, OpenDate, CloseDate) VALUES (1, 'John', 'Doe', 'NY', 'settled', '2020-01-01', '2020-06-01'), (2, 'Jane', 'Smith', 'CA', 'won', '2019-01-01', '2019-12-31');", "sql": "SELECT ClientFirstName, ClientLastName, State, DATEDIFF(CloseDate, OpenDate) AS DaysOpen FROM Cases WHERE CaseOutcome = 'settled' ORDER BY DaysOpen DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes in the series are from season 12?", "schema": "CREATE TABLE table_2226817_8 (no_in_series VARCHAR, no_in_season VARCHAR)", "sql": "SELECT no_in_series FROM table_2226817_8 WHERE no_in_season = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total size of green buildings in the 'green_buildings' table?", "schema": "CREATE TABLE green_buildings (id INT, size FLOAT, certification VARCHAR(255), PRIMARY KEY (id)); INSERT INTO green_buildings (id, size, certification) VALUES (1, 1200.0, 'LEED'), (2, 800.0, 'BREEAM'), (3, 1500.0, 'WELL');", "sql": "SELECT SUM(size) FROM green_buildings WHERE certification IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "List all marine species that are found at a depth greater than 500 meters.", "schema": "CREATE TABLE marine_species (id INT, name VARCHAR(255), habitat_depth INT); INSERT INTO marine_species (id, name, habitat_depth) VALUES (1, 'Clownfish', 20), (2, 'Blue Whale', 1000);", "sql": "SELECT name FROM marine_species WHERE habitat_depth > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 379).", "schema": null, "sql": "SELECT interval '0 days' * 'infinity'::float;", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '0 days' * 'infinity'::float) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Create a table named 'workforce_training'", "schema": "CREATE TABLE workforce_training (id INT PRIMARY KEY, employee_name VARCHAR(255), training_topic VARCHAR(255), training_hours INT, training_completion_date DATE);", "sql": "CREATE TABLE workforce_training (id INT PRIMARY KEY, employee_name VARCHAR(255), training_topic VARCHAR(255), training_hours INT, training_completion_date DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What was the total quantity of refrigerated cargo unloaded at the port of Long Beach in August 2021, grouped by cargo type?", "schema": "CREATE TABLE ports (id INT, name VARCHAR(50)); INSERT INTO ports (id, name) VALUES (1, 'Oakland'), (2, 'Long Beach'), (3, 'Los Angeles'); CREATE TABLE cargo (id INT, port_id INT, cargo_type VARCHAR(50), temperature_type VARCHAR(50), quantity INT); INSERT INTO cargo (id, port_id, cargo_type, temperature_type, quantity) VALUES (1, 1, 'Frozen Food', 'Refrigerated', 500), (2, 1, 'Electronics', 'Non-Refrigerated', 800), (3, 2, 'Furniture', 'Non-Refrigerated', 700), (4, 2, 'Seafood', 'Refrigerated', 900), (5, 3, 'Vegetables', 'Refrigerated', 1000);", "sql": "SELECT cargo_type, SUM(quantity) as total_quantity FROM cargo WHERE port_id = 2 AND temperature_type = 'Refrigerated' AND MONTH(date) = 8 GROUP BY cargo_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the total population of all birds in the savannah habitat?", "schema": "CREATE TABLE animals (id INT, name VARCHAR(50), species VARCHAR(50), population INT, habitat VARCHAR(50)); INSERT INTO animals (id, name, species, population, habitat) VALUES (7, 'Eagle', 'Bird', 30, 'Savannah'); INSERT INTO animals (id, name, species, population, habitat) VALUES (8, 'Ostrich', 'Bird', 50, 'Savannah');", "sql": "SELECT SUM(population) FROM animals WHERE species LIKE '%Bird' AND habitat = 'Savannah';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total timber production for each country in 2022?", "schema": "CREATE TABLE timber_production (country_code CHAR(3), year INT, volume INT); INSERT INTO timber_production (country_code, year, volume) VALUES ('IDN', 2022, 12000), ('IDN', 2021, 11000), ('JPN', 2022, 15000), ('JPN', 2021, 13000);", "sql": "SELECT c.country_name, SUM(tp.volume) as total_volume FROM timber_production tp INNER JOIN country c ON tp.country_code = c.country_code WHERE tp.year = 2022 GROUP BY c.country_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much Played has an Against larger than 11, and a Team of botafogo, and a Position smaller than 2?", "schema": "CREATE TABLE table_name_57 (played VARCHAR, position VARCHAR, against VARCHAR, team VARCHAR)", "sql": "SELECT COUNT(played) FROM table_name_57 WHERE against > 11 AND team = 'botafogo' AND position < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the percentage of fair trade products by manufacturer?", "schema": "CREATE TABLE ManufacturerProducts (manufacturer_id INT, manufacturer_name VARCHAR(255), product_type VARCHAR(255), is_fair_trade BOOLEAN); INSERT INTO ManufacturerProducts (manufacturer_id, manufacturer_name, product_type, is_fair_trade) VALUES (1, 'EcoPure', 'Clothing', true), (2, 'GreenYarn', 'Yarn', false), (3, 'SustainableTimber', 'Furniture', true), (4, 'EthicalMinerals', 'Electronics', true), (5, 'FairTradeFabrics', 'Textiles', true), (6, 'EcoDyes', 'Dyes', false), (7, 'EcoPaints', 'Paint', true), (8, 'GreenBuilding', 'Building Materials', false);", "sql": "SELECT manufacturer_name, ROUND(COUNT(*) FILTER (WHERE is_fair_trade = true) * 100.0 / COUNT(*), 2) as fair_trade_percentage FROM ManufacturerProducts GROUP BY manufacturer_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Panthers' Division Record?", "schema": "CREATE TABLE table_name_48 (division_record VARCHAR, team VARCHAR)", "sql": "SELECT division_record FROM table_name_48 WHERE team = 'panthers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 61).", "schema": null, "sql": "SELECT * FROM check_test(\n col_has_default( 'sometab', '__asdfasdfs__', 'desc' ),\n false,\n 'col_has_default( tab, col, desc )',\n 'desc',\n ' Column sometab.__asdfasdfs__ does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 205, "num_statements": 1} {"question": "What is the average cost of Mars rovers?", "schema": "CREATE TABLE MarsRovers (name TEXT, launch_date DATE, cost INTEGER);INSERT INTO MarsRovers (name, launch_date, cost) VALUES ('Sojourner', '1996-12-04', 250000000); INSERT INTO MarsRovers (name, launch_date, cost) VALUES ('Spirit', '2003-06-10', 400000000);", "sql": "SELECT AVG(cost) FROM MarsRovers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Driver and passenger have a bike number of less than 4 with zabel -vmc equipment?", "schema": "CREATE TABLE table_name_9 (driver___passenger VARCHAR, bike_no VARCHAR, equipment VARCHAR)", "sql": "SELECT driver___passenger FROM table_name_9 WHERE bike_no < 4 AND equipment = 'zabel -vmc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the time/retired when the laps is less than 54 and the driver is mark donohue?", "schema": "CREATE TABLE table_name_35 (time_retired VARCHAR, laps VARCHAR, driver VARCHAR)", "sql": "SELECT time_retired FROM table_name_35 WHERE laps < 54 AND driver = 'mark donohue';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_data' (example 140).", "schema": null, "sql": "CREATE USER MAPPING FOR public SERVER s8;", "explanation": "DDL from PostgreSQL core regression test for Foreign Data.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which long version was remixed in 1986?", "schema": "CREATE TABLE table_name_51 (remixed_by VARCHAR, year VARCHAR, version VARCHAR)", "sql": "SELECT remixed_by FROM table_name_51 WHERE year = 1986 AND version = 'long version';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "How many cases were won by attorneys from the law firm 'Smith & Jones' in the last 3 years?", "schema": "CREATE TABLE attorneys (attorney_id INT, law_firm VARCHAR(50), joined_date DATE); INSERT INTO attorneys (attorney_id, law_firm, joined_date) VALUES (1, 'Smith & Jones', '2015-01-01'); CREATE TABLE cases (case_id INT, attorney_id INT, case_outcome VARCHAR(10), case_date DATE); INSERT INTO cases (case_id, attorney_id, case_outcome, case_date) VALUES (1, 1, 'Won', '2020-05-01'), (2, 1, 'Lost', '2019-08-15');", "sql": "SELECT COUNT(*) FROM cases JOIN attorneys ON cases.attorney_id = attorneys.attorney_id WHERE attorneys.law_firm = 'Smith & Jones' AND cases.case_date >= DATE_SUB(CURDATE(), INTERVAL 3 YEAR) AND cases.case_outcome = 'Won';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "What is the minimum budget for an ethical AI project?", "schema": "CREATE TABLE ethical_ai_projects (id INT, project_name VARCHAR(50), budget INT); INSERT INTO ethical_ai_projects (id, project_name, budget) VALUES (1, 'Ethical AI Guidelines Development', 50000), (2, 'AI Ethics Training Program', 25000), (3, 'AI Auditing Framework Design', 75000);", "sql": "SELECT MIN(budget) FROM ethical_ai_projects;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "How many animals are in the 'habitat_preservation' table?", "schema": "CREATE TABLE habitat_preservation (id INT, habitat_name VARCHAR(50), acres FLOAT); INSERT INTO habitat_preservation (id, habitat_name, acres) VALUES (1, 'Forest', 500.5), (2, 'Wetlands', 300.2), (3, 'Grasslands', 700.1);", "sql": "SELECT COUNT(*) FROM habitat_preservation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "What is the average fare for buses and trains in the NYC subway system?", "schema": "CREATE TABLE nyc_subway (route_type VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO nyc_subway (route_type, fare) VALUES ('Bus', 2.75), ('Train', 3);", "sql": "SELECT AVG(fare) FROM nyc_subway WHERE route_type IN ('Bus', 'Train');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Update the water usage for California in 2020 to 10000.", "schema": "CREATE TABLE water_usage(state VARCHAR(20), year INT, usage FLOAT);", "sql": "UPDATE water_usage SET usage=10000 WHERE state='California' AND year=2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was eliminated on Banana Night?", "schema": "CREATE TABLE table_name_33 (name VARCHAR, eliminated VARCHAR)", "sql": "SELECT name FROM table_name_33 WHERE eliminated = 'banana night';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many members have a recorded heart rate measurement greater than 120 in September?", "schema": "CREATE TABLE Members (Id INT, Age INT, Gender VARCHAR(10)); CREATE TABLE Measurements (Id INT, MemberId INT, HeartRate INT, Date DATE); INSERT INTO Members (Id, Age, Gender) VALUES (1, 25, 'Female'), (2, 32, 'Male'), (3, 45, 'Female'), (4, 28, 'Non-binary'); INSERT INTO Measurements (Id, MemberId, HeartRate, Date) VALUES (1, 1, 130, '2022-09-01'), (2, 1, 125, '2022-09-15'), (3, 2, 90, '2022-08-30'), (4, 3, 70, '2022-09-03'), (5, 4, 126, '2022-09-05');", "sql": "SELECT COUNT(DISTINCT MemberId) FROM Measurements INNER JOIN Members ON Measurements.MemberId = Members.Id WHERE DATE_FORMAT(Date, '%Y-%m') = '2022-09' AND HeartRate > 120;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Rating has a Centre of helsinki?", "schema": "CREATE TABLE table_name_88 (rating INTEGER, centre VARCHAR)", "sql": "SELECT SUM(rating) FROM table_name_88 WHERE centre = 'helsinki';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of heritage sites and associated artifacts in each continent?", "schema": "CREATE TABLE Site (SiteID INT, SiteName VARCHAR(50), Continent VARCHAR(50)); INSERT INTO Site (SiteID, SiteName, Continent) VALUES (1, 'Great Wall', 'Asia'), (2, 'Machu Picchu', 'South America'), (3, 'Easter Island', 'South America'); CREATE TABLE Artifact (ArtifactID INT, ArtifactName VARCHAR(50), SiteID INT); INSERT INTO Artifact (ArtifactID, ArtifactName, SiteID) VALUES (1, 'Watchtower', 1), (2, 'Temple of the Sun', 1), (3, 'Temple of the Moon', 1), (4, 'Llama Figurine', 2);", "sql": "SELECT Continent, COUNT(DISTINCT SiteID) as SiteCount, (SELECT COUNT(*) FROM Artifact WHERE EXISTS (SELECT 1 FROM Site WHERE Site.SiteID = Artifact.SiteID AND Site.Continent = Site.Continent)) as ArtifactCount FROM Site GROUP BY Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "What is the total assets value for customers from the USA?", "schema": "CREATE TABLE customers (id INT, name TEXT, age INT, country TEXT, assets FLOAT); INSERT INTO customers (id, name, age, country, assets) VALUES (1, 'John Doe', 45, 'USA', 250000.00); INSERT INTO customers (id, name, age, country, assets) VALUES (2, 'Jane Smith', 34, 'Canada', 320000.00);", "sql": "SELECT SUM(assets) FROM customers WHERE country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Film title used in nomination of Cilvēka Bērns?", "schema": "CREATE TABLE table_name_91 (film_title_used_in_nomination VARCHAR, original_title VARCHAR)", "sql": "SELECT film_title_used_in_nomination FROM table_name_91 WHERE original_title = 'cilvēka bērns';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Delete a department from the \"departments\" table", "schema": "CREATE TABLE departments (id INT, department VARCHAR(50), manager VARCHAR(50));", "sql": "DELETE FROM departments WHERE department = 'Marketing';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Counties when Mitt Romney was the candidate with less than 0 delegates?", "schema": "CREATE TABLE table_name_62 (counties INTEGER, candidate VARCHAR, delegates VARCHAR)", "sql": "SELECT MAX(counties) FROM table_name_62 WHERE candidate = 'mitt romney' AND delegates < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the average depth at which sharks have been recorded in the Arctic Ocean?", "schema": "CREATE TABLE shark_sightings (id INT, name TEXT, location TEXT, depth INT); INSERT INTO shark_sightings (id, name, location, depth) VALUES (1, 'Greenland Shark', 'Arctic Ocean', 2000), (2, 'Pacific Sleeper Shark', 'Arctic Ocean', 2500), (3, 'Basking Shark', 'Atlantic Ocean', 1000);", "sql": "SELECT AVG(depth) FROM shark_sightings WHERE location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the High rebounds of a Game with 56?", "schema": "CREATE TABLE table_name_70 (high_rebounds VARCHAR, game VARCHAR)", "sql": "SELECT high_rebounds FROM table_name_70 WHERE game = 56;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the dates of enrollment and completion of the student with personal name \"Karson\".", "schema": "CREATE TABLE Student_Course_Enrolment (date_of_enrolment VARCHAR, date_of_completion VARCHAR, student_id VARCHAR); CREATE TABLE Students (student_id VARCHAR, personal_name VARCHAR)", "sql": "SELECT T1.date_of_enrolment, T1.date_of_completion FROM Student_Course_Enrolment AS T1 JOIN Students AS T2 ON T1.student_id = T2.student_id WHERE T2.personal_name = 'Karson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the minimum speed recorded for vessels arriving in South Korea in August 2022?", "schema": "CREATE TABLE vessel_performance (id INT, name TEXT, speed DECIMAL(5,2), arrived_date DATE, country TEXT); INSERT INTO vessel_performance (id, name, speed, arrived_date, country) VALUES (1, 'Vessel P', 12.8, '2022-08-02', 'South Korea'), (2, 'Vessel Q', 14.5, '2022-08-15', 'South Korea'), (3, 'Vessel R', 11.9, '2022-08-26', 'South Korea');", "sql": "SELECT MIN(speed) FROM vessel_performance WHERE YEAR(arrived_date) = 2022 AND MONTH(arrived_date) = 8 AND country = 'South Korea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the total number of public meetings for each department?", "schema": "CREATE TABLE PublicMeetings ( MeetingId INT, MeetingDate DATE, Department VARCHAR(255) ); INSERT INTO PublicMeetings (MeetingId, MeetingDate, Department) VALUES (1, '2021-01-01', 'Transportation'), (2, '2021-02-01', 'Education'), (3, '2021-03-01', 'Healthcare');", "sql": "SELECT Department, COUNT(*) OVER (PARTITION BY Department) as TotalMeetings FROM PublicMeetings;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The man who received 87,676 votes in Queens won what percentage of the total for the election?", "schema": "CREATE TABLE table_1108394_47 (_percentage VARCHAR, queens VARCHAR)", "sql": "SELECT _percentage FROM table_1108394_47 WHERE queens = '87,676';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the adoption rate of electric scooters in Madrid?", "schema": "CREATE TABLE scooter_adoption (id INT, city VARCHAR(20), adoption_rate FLOAT); INSERT INTO scooter_adoption (id, city, adoption_rate) VALUES (1, 'Madrid', 0.2), (2, 'Barcelona', 0.15);", "sql": "SELECT adoption_rate FROM scooter_adoption WHERE city = 'Madrid';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Get the average number of employees for startups that have had an exit", "schema": "CREATE TABLE company (id INT, name TEXT, number_of_employees INT, has_exited BOOLEAN); CREATE TABLE exit_strategies (id INT, company_id INT, exit_year INT, exit_amount INT);", "sql": "SELECT AVG(number_of_employees) FROM company c INNER JOIN exit_strategies es ON c.id = es.company_id WHERE has_exited = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Calculate veteran employment rates by state, for the past 6 months", "schema": "CREATE TABLE veteran_employment (veteran_id INT, veteran_state VARCHAR(2), employment_status VARCHAR(255), employment_date DATE);", "sql": "SELECT veteran_state, AVG(CASE WHEN employment_status = 'Employed' THEN 100 ELSE 0 END) as employment_rate FROM veteran_employment WHERE employment_date >= DATEADD(month, -6, CURRENT_DATE) GROUP BY veteran_state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What place is South Africa?", "schema": "CREATE TABLE table_name_99 (place VARCHAR, country VARCHAR)", "sql": "SELECT place FROM table_name_99 WHERE country = 'south africa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Update labor_statistics table and set hourly_wage = 30.50 for all records where job_category is 'Carpentry'", "schema": "CREATE TABLE labor_statistics (id INT, job_category VARCHAR(20), hourly_wage DECIMAL(5,2));", "sql": "UPDATE labor_statistics SET hourly_wage = 30.50 WHERE job_category = 'Carpentry';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find all the rooms that have a price higher than 160 and can accommodate more than 2 people. Report room names and ids.", "schema": "CREATE TABLE Rooms (roomName VARCHAR, RoomId VARCHAR, basePrice VARCHAR, maxOccupancy VARCHAR)", "sql": "SELECT roomName, RoomId FROM Rooms WHERE basePrice > 160 AND maxOccupancy > 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 322).", "schema": null, "sql": "SELECT nummultirange(numrange(1,2)) + nummultirange();", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange(numrange(1,2)) + nummultirange()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'oid8': Write the SELECT query (example 36).", "schema": null, "sql": "SELECT 1::int4::oid8;", "explanation": "Regression test for Oid8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 1::int4::oid8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 134).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (3,0,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "List all countries with their respective number of marine species in danger.", "schema": "CREATE TABLE countries (country_name TEXT, country_code TEXT); CREATE TABLE marine_species (species_name TEXT, country_code TEXT, status TEXT); INSERT INTO countries (country_name, country_code) VALUES ('Australia', 'AU'), ('Brazil', 'BR'), ('Indonesia', 'ID'); INSERT INTO marine_species (species_name, country_code, status) VALUES ('Clownfish', 'AU', 'endangered'), ('Seahorse', 'BR', 'endangered'), ('Manta Ray', 'ID', 'endangered');", "sql": "SELECT countries.country_name, COUNT(marine_species.species_name) FROM countries INNER JOIN marine_species ON countries.country_code = marine_species.country_code WHERE marine_species.status = 'endangered' GROUP BY countries.country_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 238, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the company from Greece with author Aeschylus?", "schema": "CREATE TABLE table_name_45 (company VARCHAR, country VARCHAR, author VARCHAR)", "sql": "SELECT company FROM table_name_45 WHERE country = 'greece' AND author = 'aeschylus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of marine species observed in the Pacific and Atlantic oceans, grouped by year?", "schema": "CREATE TABLE MarineSpeciesObservations (observation_id INT, species VARCHAR(255), location VARCHAR(255), year INT); INSERT INTO MarineSpeciesObservations (observation_id, species, location, year) VALUES (1, 'Dolphin', 'Pacific Ocean', 2020); INSERT INTO MarineSpeciesObservations (observation_id, species, location, year) VALUES (2, 'Shark', 'Atlantic Ocean', 2019);", "sql": "SELECT year, COUNT(*) FROM MarineSpeciesObservations WHERE location IN ('Pacific Ocean', 'Atlantic Ocean') GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "How many broadband customers does the Tokyo regional office have?", "schema": "CREATE TABLE broadband_subscribers (subscriber_id INT, regional_office VARCHAR(20)); INSERT INTO broadband_subscribers (subscriber_id, regional_office) VALUES (1, 'Boston'), (2, 'Boston'), (3, 'Tokyo'), (4, 'NYC'), (5, 'Tokyo');", "sql": "SELECT COUNT(*) FROM broadband_subscribers WHERE regional_office = 'Tokyo';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of points for the maserati straight-4 engine, later than 1959?", "schema": "CREATE TABLE table_name_94 (points VARCHAR, engine VARCHAR, year VARCHAR)", "sql": "SELECT COUNT(points) FROM table_name_94 WHERE engine = 'maserati straight-4' AND year > 1959;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show all advisors who have at least two students.", "schema": "CREATE TABLE Student (advisor VARCHAR)", "sql": "SELECT advisor FROM Student GROUP BY advisor HAVING COUNT(*) >= 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Create a table named 'routes' with columns 'route_id', 'name', 'type'", "schema": "CREATE TABLE routes (route_id INT, name VARCHAR(255), type VARCHAR(255));", "sql": "CREATE TABLE routes (route_id INT, name VARCHAR(255), type VARCHAR(255));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the sum of games played when the losses is less than 7, the wins is 6 and the goals for is more than 76?", "schema": "CREATE TABLE table_name_31 (games_played INTEGER, goals_for VARCHAR, losses VARCHAR, wins VARCHAR)", "sql": "SELECT SUM(games_played) FROM table_name_31 WHERE losses < 7 AND wins = 6 AND goals_for > 76;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List all environmental impact assessments from 2019.", "schema": "CREATE TABLE environmental_assessments (assessment_id INT, assessment_date DATE); INSERT INTO environmental_assessments VALUES (1, '2019-04-01'); INSERT INTO environmental_assessments VALUES (2, '2018-09-20'); INSERT INTO environmental_assessments VALUES (3, '2020-12-15');", "sql": "SELECT * FROM environmental_assessments WHERE assessment_date >= '2019-01-01' AND assessment_date < '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Total, when Nation is Soviet Union, and when Gold is greater than 9?", "schema": "CREATE TABLE table_name_33 (total INTEGER, nation VARCHAR, gold VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_33 WHERE nation = 'soviet union' AND gold > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which autonomous driving research projects received funding from the US government in 2021 and 2022?", "schema": "CREATE TABLE Research_Project (id INT, name VARCHAR(50), type VARCHAR(20), country VARCHAR(10), start_year INT, end_year INT); INSERT INTO Research_Project (id, name, type, country, start_year, end_year) VALUES (1, 'Project A', 'Autonomous Driving', 'USA', 2021, 2023), (2, 'Project B', 'Electric Vehicles', 'China', 2020, 2022), (3, 'Project C', 'Autonomous Driving', 'Germany', 2022, 2025);", "sql": "SELECT name FROM Research_Project WHERE type = 'Autonomous Driving' AND country = 'USA' AND start_year BETWEEN 2021 AND 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the format for the album under the label, luaka bop, that had a catalog number of 3645 and dated after 1981?", "schema": "CREATE TABLE table_name_15 (format VARCHAR, label VARCHAR, catalog VARCHAR, date VARCHAR)", "sql": "SELECT format FROM table_name_15 WHERE catalog = '3645' AND date > 1981 AND label = 'luaka bop';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the draw when the tries against was 69?", "schema": "CREATE TABLE table_name_75 (drawn VARCHAR, tries_against VARCHAR)", "sql": "SELECT drawn FROM table_name_75 WHERE tries_against = '69';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 177).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 13).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.create_compressed_chunk(chunk regclass,chunk_table regclass,uncompressed_heap_size bigint,uncompressed_toast_size bigint,uncompressed_index_size bigint,compressed_heap_size bigint,compressed_toast_size bigint,compressed_index_size bigint,numrows_pre_compression bigint,numrows_post_compression bigint) RETURNS regclass LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.create_compressed_chunk(regclass,regclass,bigint,bigint,bigint,bigint,bigint,bigint,bigint,bigint) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n RETURN _timescaledb_functions.create_compressed_chunk($1,$2,$3,$4,$5,$6,$7,$8,$9,$10);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 904, "num_statements": 4} {"question": "PL/pgSQL test: Plperlu--1.0 (example 4).", "schema": null, "sql": "CREATE LANGUAGE plperlu\n HANDLER plperlu_call_handler\n INLINE plperlu_inline_handler\n VALIDATOR plperlu_validator;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperlu--1.0.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "What is the average volume of timber harvested per year, per species?", "schema": "CREATE TABLE forests (id INT, species VARCHAR(255), year INT, volume FLOAT); INSERT INTO forests (id, species, year, volume) VALUES (1, 'Pine', 2018, 1200.5), (2, 'Oak', 2019, 1500.3), (3, 'Maple', 2020, 2000.7), (4, 'Birch', 2020, 1750.6), (5, 'Pine', 2019, 1300.8), (6, 'Spruce', 2018, 1400.9), (7, 'Spruce', 2019, 1500.0);", "sql": "SELECT species, AVG(volume) as avg_volume FROM forests GROUP BY species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total carbon sequestration of all the trees in the Trees table, if each tree sequesters 48.19 pounds of carbon per year on average?", "schema": "CREATE TABLE Trees (id INT, species VARCHAR(255), age INT); INSERT INTO Trees (id, species, age) VALUES (1, 'Oak', 50), (2, 'Pine', 30), (3, 'Maple', 40);", "sql": "SELECT SUM(48.19 * age) FROM Trees;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "What is the name and location of the rural hospital with the most medical professionals?", "schema": "CREATE TABLE medical_professionals (id INT, name VARCHAR(50), hospital_id INT); CREATE TABLE hospitals (id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO medical_professionals (id, name, hospital_id) VALUES (1, 'Dr. Smith', 1), (2, 'Dr. Johnson', 1), (3, 'Dr. Lee', 2); INSERT INTO hospitals (id, name, location) VALUES (1, 'Hospital A', 'New York'), (2, 'Hospital B', 'New York');", "sql": "SELECT hospitals.name, hospitals.location FROM hospitals JOIN (SELECT hospital_id, COUNT(*) as num_of_professionals FROM medical_professionals GROUP BY hospital_id ORDER BY num_of_professionals DESC LIMIT 1) AS subquery ON hospitals.id = subquery.hospital_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which occurence has the matrix sim marked as 0.925?", "schema": "CREATE TABLE table_26708105_2 (occurrence VARCHAR, matrix_sim VARCHAR)", "sql": "SELECT occurrence FROM table_26708105_2 WHERE matrix_sim = '0.925';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Which threat intelligence sources have provided the most actionable intelligence in the last quarter?", "schema": "CREATE TABLE threat_intelligence(id INT, source VARCHAR(50), category VARCHAR(50), rating INT, date DATE);", "sql": "SELECT source, SUM(rating) as total_rating FROM threat_intelligence WHERE date > DATE(NOW()) - INTERVAL 90 DAY AND category = 'actionable' GROUP BY source ORDER BY total_rating DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Regions with the highest percentage of cruelty-free product preferences?", "schema": "CREATE TABLE user_preferences (user_id INT, region VARCHAR(50), cruelty_free BOOLEAN); INSERT INTO user_preferences (user_id, region, cruelty_free) VALUES (1, 'North America', true), (2, 'Europe', false), (3, 'Asia', true);", "sql": "SELECT region, AVG(cruelty_free) as cruelty_free_avg FROM user_preferences GROUP BY region ORDER BY cruelty_free_avg DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What is the maximum, minimum, and average accommodation cost for students with cognitive disabilities in each region?", "schema": "CREATE TABLE AccommodationCostsForCognitiveDisabilities (StudentID INT, Region VARCHAR(50), AccommodationCost DECIMAL(10,2));", "sql": "SELECT Region, MIN(AccommodationCost) as MinCost, MAX(AccommodationCost) as MaxCost, AVG(AccommodationCost) as AvgCost FROM AccommodationCostsForCognitiveDisabilities WHERE DisabilityType = 'cognitive disability' GROUP BY Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 229, "num_statements": 1} {"question": "Identify the average ESG score for all sectors.", "schema": "CREATE TABLE companies (id INT, sector VARCHAR(20), ESG_score FLOAT); INSERT INTO companies (id, sector, ESG_score) VALUES (1, 'technology', 78.3), (2, 'finance', 65.2), (3, 'technology', 81.5), (4, 'healthcare', 72.1);", "sql": "SELECT sector, AVG(ESG_score) FROM companies GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the opponent when the record is 3-8?", "schema": "CREATE TABLE table_name_53 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_53 WHERE record = '3-8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'union': Write the SELECT query (example 20).", "schema": null, "sql": "SELECT f1 AS nine FROM FLOAT8_TBL\nUNION\nSELECT f1 FROM INT4_TBL\nORDER BY 1;", "explanation": "Regression test for Union in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f1 AS nine FROM FLOAT8_TBL\nUNION\nSELECT f1 FROM INT4_TBL\nORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the circuit for alfa romeo swedish ice race", "schema": "CREATE TABLE table_name_5 (circuit VARCHAR, winning_constructor VARCHAR, name VARCHAR)", "sql": "SELECT circuit FROM table_name_5 WHERE winning_constructor = 'alfa romeo' AND name = 'swedish ice race';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the average risk score of vulnerabilities detected in the last 30 days, grouped by software vendor?", "schema": "CREATE TABLE vulnerabilities (id INT, detection_date DATE, software_vendor VARCHAR(255), risk_score INT); INSERT INTO vulnerabilities (id, detection_date, software_vendor, risk_score) VALUES (1, '2022-01-01', 'VendorA', 7), (2, '2022-01-05', 'VendorB', 5), (3, '2022-01-10', 'VendorA', 9);", "sql": "SELECT software_vendor, AVG(risk_score) as avg_risk_score FROM vulnerabilities WHERE detection_date >= DATE(NOW()) - INTERVAL 30 DAY GROUP BY software_vendor;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the highest pick number of the CFL team, the winnipeg blue bombers?", "schema": "CREATE TABLE table_name_42 (pick__number INTEGER, cfl_team VARCHAR)", "sql": "SELECT MAX(pick__number) FROM table_name_42 WHERE cfl_team = 'winnipeg blue bombers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: If the Away Team is Arsenal what is the Score?", "schema": "CREATE TABLE table_name_47 (score VARCHAR, away_team VARCHAR)", "sql": "SELECT score FROM table_name_47 WHERE away_team = 'arsenal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total data usage for each country in the last month?", "schema": "CREATE TABLE broadband_subscribers (subscriber_id INT, country VARCHAR(50), data_usage INT, subscription_date DATE); INSERT INTO broadband_subscribers (subscriber_id, country, data_usage, subscription_date) VALUES (1, 'India', 50, '2022-03-15'), (2, 'Brazil', 75, '2022-03-10'), (3, 'Indonesia', 100, '2022-03-05'), (4, 'India', 25, '2022-03-20'), (5, 'Brazil', 10, '2022-03-25');", "sql": "SELECT country, SUM(data_usage) AS total_data_usage FROM broadband_subscribers WHERE subscription_date >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "What was the total number of cases heard by each judge in the criminal division in 2020?", "schema": "CREATE TABLE Judges (Name VARCHAR(255), Division VARCHAR(255), CasesHeard INT); INSERT INTO Judges (Name, Division, CasesHeard) VALUES ('Judge 1', 'Criminal', 100), ('Judge 2', 'Civil', 125), ('Judge 3', 'Criminal', 150);", "sql": "SELECT Division, SUM(CasesHeard) as TotalCasesHeard FROM Judges WHERE Division = 'Criminal' AND YEAR(EventDate) = 2020 GROUP BY Division;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 247).", "schema": null, "sql": "create function testpolym(a int) returns table(a int) as $$ select $1;$$ language sql;\nselect * from testpolym(37);\ndrop function testpolym(int);\n\n-- test polymorphic params and defaults\ncreate function dfunc(a anyelement, b anyelement = null, flag bool = true)\nreturns anyelement as $$\n select case when $3 then $1 else $2 end;", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 329, "num_statements": 5} {"question": "Generate PostgreSQL SQL for: What is the memory with a socket g1 and a 1/1/6/9 turbo?", "schema": "CREATE TABLE table_name_93 (memory VARCHAR, socket VARCHAR, turbo VARCHAR)", "sql": "SELECT memory FROM table_name_93 WHERE socket = 'socket g1' AND turbo = '1/1/6/9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'oid8' (example 17).", "schema": null, "sql": "INSERT INTO OID8_TBL(f1) VALUES (' 5d');", "explanation": "DML from PostgreSQL core regression test for Oid8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Player had a Long of 5 and Yards of 6?", "schema": "CREATE TABLE table_name_74 (player VARCHAR, long VARCHAR, yards VARCHAR)", "sql": "SELECT player FROM table_name_74 WHERE long = '5' AND yards = '6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of the swimmer with a rank over 2 with a time of 55.77?", "schema": "CREATE TABLE table_name_47 (nationality VARCHAR, rank VARCHAR, time VARCHAR)", "sql": "SELECT nationality FROM table_name_47 WHERE rank > 2 AND time = '55.77';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total number of workers represented by unions with headquarters in 'New York'?", "schema": "CREATE TABLE if not exists union_membership (union_id INT, worker_id INT); CREATE TABLE if not exists unions (union_id INT, union_name TEXT, headquarters_address TEXT, total_workers INT); INSERT INTO union_membership (union_id, worker_id) VALUES (1, 1001), (1, 1002), (1, 1003), (2, 2001), (2, 2002), (3, 3001); INSERT INTO unions (union_id, union_name, headquarters_address, total_workers) VALUES (1, 'United Steelworkers', '60 Boulevard of the Allies, Pittsburgh, PA 15222', 5000), (2, 'Teamsters', '25 Louisiana Ave NW, Washington, DC 20001', 7000), (3, 'UAW', '8000 E Jefferson Ave, Detroit, MI 48214', 6000), (4, 'NYC Labor Council', '275 Seventh Avenue, 18th Floor, New York, NY 10001', 8000);", "sql": "SELECT SUM(total_workers) FROM unions WHERE headquarters_address LIKE '%New York%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want to know the home team for mcg venue", "schema": "CREATE TABLE table_name_71 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team FROM table_name_71 WHERE venue = 'mcg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "List all sustainable building practices with a LEED certification date in the year 2021", "schema": "CREATE TABLE sustainable_practices (project_id INT, leed_certification_date DATE);", "sql": "SELECT * FROM sustainable_practices WHERE EXTRACT(YEAR FROM leed_certification_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who wrote the no 2?", "schema": "CREATE TABLE table_21726793_1 (written_by VARCHAR, no VARCHAR)", "sql": "SELECT written_by FROM table_21726793_1 WHERE no = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of silver medals for countries of rank 14, with less than 1 total medals?", "schema": "CREATE TABLE table_name_66 (silver INTEGER, rank VARCHAR, total VARCHAR)", "sql": "SELECT SUM(silver) FROM table_name_66 WHERE rank = '14' AND total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 431).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION functions_are ( NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copydml' (example 46).", "schema": null, "sql": "create rule qqq as on delete to copydml_test do instead (insert into copydml_test default values; insert into copydml_test default values);", "explanation": "DDL from PostgreSQL core regression test for Copydml.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 139, "num_statements": 2} {"question": "Calculate the total number of refills for antibiotics prescribed in Q1 2022.", "schema": "CREATE TABLE medications (id INT, patient_id INT, medication VARCHAR(30), prescription_date DATE, refills INT); INSERT INTO medications (id, patient_id, medication, prescription_date, refills) VALUES (1, 3, 'Amoxicillin', '2022-02-01', 2), (2, 4, 'Ciprofloxacin', '2022-03-15', 1);", "sql": "SELECT SUM(refills) FROM medications WHERE medication LIKE '%antibiotic%' AND prescription_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many values reflect the overall record of the team coached by Frank Beamer?", "schema": "CREATE TABLE table_28744929_2 (overall_record VARCHAR, head_coach VARCHAR)", "sql": "SELECT COUNT(overall_record) FROM table_28744929_2 WHERE head_coach = 'Frank Beamer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total number of 'Zumba' classes offered?", "schema": "CREATE TABLE Classes (ClassID INT, ClassType VARCHAR(20), ClassDate DATE); INSERT INTO Classes (ClassID, ClassType, ClassDate) VALUES (1, 'Yoga', '2022-01-05'), (2, 'Pilates', '2022-01-07'), (3, 'Zumba', '2022-02-03');", "sql": "SELECT COUNT(ClassID) FROM Classes WHERE ClassType = 'Zumba';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Which regions had the highest total donations in 2019?", "schema": "CREATE TABLE donations (donation_id INT, donation_date DATE, amount DECIMAL(10,2), region TEXT); INSERT INTO donations (donation_id, donation_date, amount, region) VALUES (1, '2019-01-01', 1000.00, 'North America'), (2, '2019-02-01', 1500.00, 'Europe'), (3, '2019-03-01', 2000.00, 'Asia');", "sql": "SELECT region, SUM(amount) as total_donations FROM donations WHERE YEAR(donation_date) = 2019 GROUP BY region ORDER BY total_donations DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the maximum number of exhibitions attended by a single visitor?", "schema": "CREATE TABLE Visitor_Exhibitions (visitor_id INT, exhibition_id INT);", "sql": "SELECT MAX(Visitor_Exhibitions_agg.visitor_exhibitions) FROM (SELECT COUNT(*) AS visitor_exhibitions FROM Visitor_Exhibitions GROUP BY visitor_id) AS Visitor_Exhibitions_agg;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "What is the minimum number of marine species observed in the Andaman Sea?", "schema": "CREATE TABLE marine_species_count (id INT, location TEXT, species_count INT); INSERT INTO marine_species_count (id, location, species_count) VALUES (1, 'Andaman Sea', 500), (2, 'South China Sea', 700), (3, 'Mediterranean Sea', 600);", "sql": "SELECT MIN(species_count) FROM marine_species_count WHERE location = 'Andaman Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Who is the oldest blues artist in our database?", "schema": "CREATE TABLE Artists (ArtistID INT, Name VARCHAR(255), Age INT, Genre VARCHAR(255)); INSERT INTO Artists VALUES (1, 'B.B. King', 92, 'Blues'); INSERT INTO Artists VALUES (2, 'Muddy Waters', 89, 'Blues'); INSERT INTO Artists VALUES (3, 'Buddy Guy', 86, 'Blues');", "sql": "SELECT Name, MAX(Age) FROM Artists WHERE Genre = 'Blues';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the show's weekly ranking 12?", "schema": "CREATE TABLE table_27319183_7 (date VARCHAR, weekly_rank VARCHAR)", "sql": "SELECT date FROM table_27319183_7 WHERE weekly_rank = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'polymorphism' (example 19).", "schema": null, "sql": "drop function polyf(x anycompatible, y anycompatible);", "explanation": "PL/pgSQL object from PostgreSQL core test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 61).", "schema": null, "sql": "SELECT '[{\"attributes\" : [1, {}], \"dependency\" : 1, \"degree\": \"1.2\"}]'::pg_dependencies;", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [1, {}], \"dependency\" : 1, \"degree\": \"1.2\"}]'::pg_dependencies) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is score of a team with tie of 4?", "schema": "CREATE TABLE table_name_38 (score VARCHAR, tie_no VARCHAR)", "sql": "SELECT score FROM table_name_38 WHERE tie_no = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total number of high severity vulnerabilities in the 'vuln_assessments' table?", "schema": "CREATE TABLE vuln_assessments (id INT, severity VARCHAR(10), description TEXT); INSERT INTO vuln_assessments (id, severity, description) VALUES (1, 'high', 'SQL Injection'), (2, 'medium', 'Cross-Site Scripting'), (3, 'high', 'Privilege Escalation');", "sql": "SELECT COUNT(*) FROM vuln_assessments WHERE severity = 'high';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For the game with 528 attendance, what was the result?", "schema": "CREATE TABLE table_name_14 (result VARCHAR, attendance VARCHAR)", "sql": "SELECT result FROM table_name_14 WHERE attendance = '528';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the value of barrel twist when the barrel profile is SFW?", "schema": "CREATE TABLE table_12834315_8 (barrel_twist VARCHAR, barrel_profile VARCHAR)", "sql": "SELECT barrel_twist FROM table_12834315_8 WHERE barrel_profile = 'SFW';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Which public works project in 'South' region has the highest construction cost?", "schema": "CREATE TABLE Projects (id INT, name TEXT, region TEXT, cost FLOAT); INSERT INTO Projects (id, name, region, cost) VALUES (1, 'ProjectA', 'South', 2500000.00), (2, 'ProjectB', 'South', 3000500.75), (3, 'ProjectC', 'South', 2200000.50);", "sql": "SELECT name, MAX(cost) FROM Projects WHERE region = 'South';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Show all climate communication projects from the 'communication_projects' table", "schema": "CREATE TABLE communication_projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), description TEXT, start_date DATE, end_date DATE, budget FLOAT); INSERT INTO communication_projects (id, name, location, description, start_date, end_date, budget) VALUES (1, 'Public Awareness Campaign', 'New York', 'Raising awareness of climate change', '2018-01-01', '2018-12-31', 200000);", "sql": "SELECT * FROM communication_projects;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which club was in Position 1 in 1959-1960?", "schema": "CREATE TABLE table_name_59 (clubs VARCHAR, position_in_1959_1960 VARCHAR)", "sql": "SELECT clubs FROM table_name_59 WHERE position_in_1959_1960 = '1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 99).", "schema": null, "sql": "SELECT '[(-1,-1,-1),(1,1,1)]'::cube && '0'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'inherit': Write the SELECT query (example 36).", "schema": null, "sql": "SELECT relname, d.* FROM ONLY d, pg_class where d.tableoid = pg_class.oid;", "explanation": "Regression test for Inherit in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT relname, d.* FROM ONLY d, pg_class where d.tableoid = pg_class.oid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "Delete all records related to diversity training programs.", "schema": "CREATE TABLE TrainingPrograms (ProgramID INT, ProgramName VARCHAR(30), ProgramType VARCHAR(20)); INSERT INTO TrainingPrograms (ProgramID, ProgramName, ProgramType) VALUES (1, 'Diversity Training', 'Diversity'), (2, 'Leadership Training', 'Leadership'), (3, 'Team Building', 'Teamwork');", "sql": "DELETE FROM TrainingPrograms WHERE ProgramType = 'Diversity';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Class for the ERP W of more than 2 and the call sign of w223au?", "schema": "CREATE TABLE table_name_50 (class VARCHAR, erp_w VARCHAR, call_sign VARCHAR)", "sql": "SELECT class FROM table_name_50 WHERE erp_w > 2 AND call_sign = 'w223au';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the minimum weight for each type of artifact across all excavation sites?", "schema": "CREATE TABLE ExcavationSite (SiteID INT, SiteName VARCHAR(50)); INSERT INTO ExcavationSite (SiteID, SiteName) VALUES (1, 'Site A'), (2, 'Site B'), (3, 'Site C'); CREATE TABLE Artifact (ArtifactID INT, SiteID INT, Weight FLOAT, ObjectType VARCHAR(50)); INSERT INTO Artifact (ArtifactID, SiteID, Weight, ObjectType) VALUES (1, 1, 54.3, 'Pottery'), (2, 1, 32.1, 'Tool'), (3, 2, 120.5, 'Statue'), (4, 2, 12.7, 'Bead'), (5, 3, 8.4, 'Bead'), (6, 1, 20.5, 'Bead');", "sql": "SELECT a.ObjectType, MIN(a.Weight) AS MinWeight FROM Artifact a JOIN ExcavationSite e ON a.SiteID = e.SiteID GROUP BY a.ObjectType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "What is the average donation amount by month in 2022?", "schema": "CREATE TABLE donations (id INT, donor VARCHAR(50), cause VARCHAR(50), amount DECIMAL(10, 2), donation_date DATE); INSERT INTO donations (id, donor, cause, amount, donation_date) VALUES (1, 'John Doe', 'Education', 500, '2022-04-01'), (2, 'Jane Smith', 'Health', 300, '2022-04-15'), (3, 'Alice Johnson', 'Environment', 700, '2022-05-05');", "sql": "SELECT EXTRACT(MONTH FROM donation_date) as month, AVG(amount) as avg_donation FROM donations WHERE donation_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "What is the maximum energy efficiency (in kWh/m2) of buildings in Canada?", "schema": "CREATE TABLE Buildings (id INT, country VARCHAR(50), energy_efficiency FLOAT); INSERT INTO Buildings (id, country, energy_efficiency) VALUES (1, 'Canada', 0.45), (2, 'Canada', 0.51), (3, 'USA', 0.42);", "sql": "SELECT MAX(energy_efficiency) FROM Buildings WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show the total number of users who have engaged with content related to indigenous rights in Canada, for the year 2021.", "schema": "CREATE TABLE user_interactions (id INT, user_id INT, content_type VARCHAR(50), interaction_date DATE); CREATE TABLE content (id INT, content_type VARCHAR(50), tags VARCHAR(500), country VARCHAR(50));", "sql": "SELECT COUNT(DISTINCT ui.user_id) as total_users FROM user_interactions ui JOIN content c ON ui.content_type = c.content_type WHERE c.tags LIKE '%indigenous rights%' AND c.country = 'Canada' AND interaction_date >= '2021-01-01' AND interaction_date <= '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 265, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the players with 1285 new points?", "schema": "CREATE TABLE table_24431264_18 (player VARCHAR, new_points VARCHAR)", "sql": "SELECT player FROM table_24431264_18 WHERE new_points = 1285;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the distribution of medical risks for astronauts by country?", "schema": "CREATE TABLE Astronauts (id INT, country TEXT);CREATE TABLE AstronautMedicalData (id INT, astronaut_id INT, medical_risk FLOAT);", "sql": "SELECT Astronauts.country, AVG(medical_risk) as avg_medical_risk, STDDEV(medical_risk) as stddev_medical_risk FROM Astronauts INNER JOIN AstronautMedicalData ON Astronauts.id = AstronautMedicalData.astronaut_id GROUP BY Astronauts.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the method used when the opponent was Tyson Griffin?", "schema": "CREATE TABLE table_name_75 (method VARCHAR, opponent VARCHAR)", "sql": "SELECT method FROM table_name_75 WHERE opponent = 'tyson griffin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Gold that has a Rank of 6, and a Bronze larger than 0 had what total number of gold?", "schema": "CREATE TABLE table_name_46 (gold VARCHAR, rank VARCHAR, bronze VARCHAR)", "sql": "SELECT COUNT(gold) FROM table_name_46 WHERE rank = '6' AND bronze > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of community health workers who have received implicit bias training, broken down by their ethnicity?", "schema": "CREATE TABLE CommunityHealthWorkers (WorkerID INT, Ethnicity VARCHAR(255), ImplicitBiasTraining DATE); INSERT INTO CommunityHealthWorkers (WorkerID, Ethnicity, ImplicitBiasTraining) VALUES (1, 'Hispanic', '2022-01-10'); INSERT INTO CommunityHealthWorkers (WorkerID, Ethnicity, ImplicitBiasTraining) VALUES (2, 'African American', '2021-12-15'); INSERT INTO CommunityHealthWorkers (WorkerID, Ethnicity, ImplicitBiasTraining) VALUES (3, 'Asian', '2022-02-03'); INSERT INTO CommunityHealthWorkers (WorkerID, Ethnicity, ImplicitBiasTraining) VALUES (4, 'Native American', '2021-08-02');", "sql": "SELECT Ethnicity, COUNT(*) as Total FROM CommunityHealthWorkers WHERE ImplicitBiasTraining IS NOT NULL GROUP BY Ethnicity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "How many movies and TV shows were released in each month?", "schema": "CREATE TABLE Movies (id INT, title VARCHAR(255), release_date DATE); CREATE TABLE TVShows (id INT, title VARCHAR(255), release_date DATE);", "sql": "SELECT DATE_FORMAT(release_date, '%Y-%m') AS Release_Month, COUNT(*) AS Total_Releases FROM Movies GROUP BY Release_Month UNION ALL SELECT DATE_FORMAT(release_date, '%Y-%m') AS Release_Month, COUNT(*) AS Total_Releases FROM TVShows GROUP BY Release_Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 255, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Outcome when the partner was kristof vliegen?", "schema": "CREATE TABLE table_name_20 (outcome VARCHAR, partner VARCHAR)", "sql": "SELECT outcome FROM table_name_20 WHERE partner = 'kristof vliegen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of the episode having exactly 9.64 million US viewers?", "schema": "CREATE TABLE table_24910742_1 (title VARCHAR, us_viewers__millions_ VARCHAR)", "sql": "SELECT title FROM table_24910742_1 WHERE us_viewers__millions_ = '9.64';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of episode for phyllo dough", "schema": "CREATE TABLE table_15187735_3 (episode VARCHAR, segment_c VARCHAR)", "sql": "SELECT COUNT(episode) FROM table_15187735_3 WHERE segment_c = 'Phyllo Dough';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Entrepreneurs requested £60,000?", "schema": "CREATE TABLE table_name_27 (entrepreneur_s_ VARCHAR, money_requested__£_ VARCHAR)", "sql": "SELECT entrepreneur_s_ FROM table_name_27 WHERE money_requested__£_ = '60,000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the number of people impacted by disasters in each country, sorted by the total number of people impacted, for the last 3 years?", "schema": "CREATE TABLE disaster (disaster_id INT, name VARCHAR(255), location VARCHAR(255), start_date DATE, people_impacted INT); INSERT INTO disaster VALUES (1, 'Hurricane Katrina', 'USA', '2005-08-29', 100000); INSERT INTO disaster VALUES (2, 'Earthquake', 'Mexico', '2017-09-19', 50000);", "sql": "SELECT location as country, SUM(people_impacted) as total_people_impacted FROM disaster WHERE start_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 YEAR) GROUP BY location ORDER BY total_people_impacted DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team that played Northampton Town at home with a tie number of replay?", "schema": "CREATE TABLE table_name_53 (away_team VARCHAR, tie_no VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_53 WHERE tie_no = 'replay' AND home_team = 'northampton town';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the result for week less than 4 and september 7, 1986", "schema": "CREATE TABLE table_name_46 (result VARCHAR, week VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_46 WHERE week < 4 AND date = 'september 7, 1986';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 36).", "schema": null, "sql": "select x, pg_typeof(x), y, pg_typeof(y)\n from polyf(11, array[1, 2], point(1,2), point(3,4));", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select x, pg_typeof(x), y, pg_typeof(y)\n from polyf(11, array[1, 2], point(1,2), point(3,4))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 94, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 212).", "schema": null, "sql": "SELECT 'tree.awdfg'::ltree @ '!tree | aWdf@*'::ltxtquery;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which role is most common for the staff?", "schema": "CREATE TABLE Project_Staff (role_code VARCHAR)", "sql": "SELECT role_code FROM Project_Staff GROUP BY role_code ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many wins were there when the position was 26th?", "schema": "CREATE TABLE table_1507423_4 (wins VARCHAR, position VARCHAR)", "sql": "SELECT COUNT(wins) FROM table_1507423_4 WHERE position = '26th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average population size of marine species in the Pacific Ocean, partitioned by habitat?", "schema": "CREATE TABLE pacific_species (id INT, species_name VARCHAR(255), population INT, habitat VARCHAR(255), ocean VARCHAR(255)); INSERT INTO pacific_species (id, species_name, population, habitat, ocean) VALUES (1, 'Giant Pacific Octopus', 20000, 'Deep Sea', 'Pacific');", "sql": "SELECT habitat, AVG(population) AS avg_population FROM pacific_species WHERE ocean = 'Pacific' GROUP BY habitat;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the game number that was on November 27?", "schema": "CREATE TABLE table_22871239_5 (_number VARCHAR, date VARCHAR)", "sql": "SELECT _number FROM table_22871239_5 WHERE date = 'November 27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 36).", "schema": null, "sql": "update trigger_test_view set v = 'update' where i = 1;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total distance traveled by each route?", "schema": "CREATE TABLE route (route_id INT, line TEXT);CREATE TABLE trip_distance (distance INT, trip_id INT, route_id INT);", "sql": "SELECT r.line, SUM(td.distance) FROM route r INNER JOIN trip_distance td ON r.route_id = td.route_id GROUP BY r.line;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What district is Henry Hyde in", "schema": "CREATE TABLE table_1341423_13 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1341423_13 WHERE incumbent = 'Henry Hyde';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of environment is the venue that was built in 2003?", "schema": "CREATE TABLE table_name_11 (environment VARCHAR, year_built VARCHAR)", "sql": "SELECT environment FROM table_name_11 WHERE year_built = '2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 87).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_table ( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average fare amount and the number of fares for each route type?", "schema": "CREATE TABLE route_types (route_type_id INT, route_type_name VARCHAR(50)); CREATE TABLE fares (fare_id INT, route_id INT, fare_amount DECIMAL(5,2), payment_method VARCHAR(50), route_type_id INT);", "sql": "SELECT rt.route_type_name, f.fare_amount, AVG(f.fare_amount) AS avg_fare_amount, COUNT(f.fare_id) AS fare_count FROM fares f JOIN route_types rt ON f.route_type_id = rt.route_type_id GROUP BY rt.route_type_name, f.fare_amount ORDER BY avg_fare_amount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "Calculate the total sales for each restaurant location", "schema": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO restaurants (restaurant_id, name, location) VALUES (1, 'Pizzeria Roma', 'NYC'), (2, 'Pizzeria Roma', 'LA'), (3, 'Taste of India', 'NYC'), (4, 'Taste of India', 'Chicago'); CREATE TABLE sales (sale_id INT, restaurant_id INT, amount DECIMAL); INSERT INTO sales (sale_id, restaurant_id, amount) VALUES (1, 1, 100), (2, 1, 200), (3, 2, 50), (4, 3, 300), (5, 4, 150);", "sql": "SELECT location, SUM(amount) as total_sales FROM sales JOIN restaurants ON sales.restaurant_id = restaurants.restaurant_id GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the year when the performance is 60.73m and the age (years) is more than 45?", "schema": "CREATE TABLE table_name_85 (year VARCHAR, performance VARCHAR, age__years_ VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_85 WHERE performance = '60.73m' AND age__years_ > 45;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total number of animals in the rescue center from the 'Australian Outback' region?", "schema": "CREATE TABLE rescue_center_animals (animal_id INT, animal_name VARCHAR(50), region VARCHAR(50)); INSERT INTO rescue_center_animals (animal_id, animal_name, region) VALUES (1, 'Kangaroo', 'Australian Outback'); INSERT INTO rescue_center_animals (animal_id, animal_name, region) VALUES (2, 'Emu', 'Australian Outback');", "sql": "SELECT COUNT(animal_id) FROM rescue_center_animals WHERE region = 'Australian Outback';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_type-conversions, item 3).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION zdb.bytea_to_json(bytea) RETURNS json\n PARALLEL SAFE IMMUTABLE STRICT\n LANGUAGE sql AS\n$$\nSELECT to_json(encode($1, 'base64'));\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 164, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Which week had a result of L 38-21?", "schema": "CREATE TABLE table_name_91 (week VARCHAR, result VARCHAR)", "sql": "SELECT week FROM table_name_91 WHERE result = 'l 38-21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE GRID OF HONDA, WITH 24 LAPS AND Time/Retired of +13.997?", "schema": "CREATE TABLE table_name_22 (grid VARCHAR, time_retired VARCHAR, manufacturer VARCHAR, laps VARCHAR)", "sql": "SELECT grid FROM table_name_22 WHERE manufacturer = 'honda' AND laps = '24' AND time_retired = '+13.997';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Create a view showing the number of tickets sold per quarter for each sport.", "schema": "CREATE TABLE Sports (sport_id INT, sport_name VARCHAR(50)); CREATE TABLE Tickets (ticket_id INT, sport_id INT, quantity INT, purchase_date DATE);", "sql": "CREATE VIEW quarterly_sales AS SELECT sport_id, EXTRACT(QUARTER FROM purchase_date) as quarter, EXTRACT(YEAR FROM purchase_date) as year, SUM(quantity) as total_sales FROM Tickets GROUP BY sport_id, quarter, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 213, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When playing against Yi Jingqian in the final on a hard surface what was the score?", "schema": "CREATE TABLE table_name_29 (score VARCHAR, surface VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT score FROM table_name_29 WHERE surface = 'hard' AND opponent_in_the_final = 'yi jingqian';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the number of autonomous vehicle accidents per month?", "schema": "CREATE TABLE autonomous_accidents (accident_date DATE, is_autonomous BOOLEAN);", "sql": "SELECT DATE_TRUNC('month', accident_date) as month, COUNT(*) as num_accidents FROM autonomous_accidents WHERE is_autonomous = true GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Lungchang has a Halang of ʒɯ², and a Rera of ʒo²?", "schema": "CREATE TABLE table_name_85 (lungchang VARCHAR, halang VARCHAR, rera VARCHAR)", "sql": "SELECT lungchang FROM table_name_85 WHERE halang = 'ʒɯ²' AND rera = 'ʒo²';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the average and oldest age for students with different sex.", "schema": "CREATE TABLE student (sex VARCHAR, age INTEGER)", "sql": "SELECT AVG(age), MAX(age), sex FROM student GROUP BY sex;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Determine the number of customers who have an account balance greater than the median balance for all accounts.", "schema": "CREATE TABLE accounts (customer_id INT, account_type VARCHAR(20), balance DECIMAL(10, 2));", "sql": "SELECT COUNT(DISTINCT customer_id) FROM accounts WHERE balance > PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY balance) OVER ();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Record of the Chicago Black Hawks Home game with the New York Rangers and a Score of 3–2?", "schema": "CREATE TABLE table_name_28 (record VARCHAR, score VARCHAR, home VARCHAR, visitor VARCHAR)", "sql": "SELECT record FROM table_name_28 WHERE home = 'chicago black hawks' AND visitor = 'new york rangers' AND score = '3–2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the total number of positions on the Toronto team in 2006-07?", "schema": "CREATE TABLE table_10015132_9 (position VARCHAR, years_in_toronto VARCHAR)", "sql": "SELECT COUNT(position) FROM table_10015132_9 WHERE years_in_toronto = '2006-07';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Delete the fashion trend with the lowest popularity.", "schema": "CREATE TABLE FASHION_TRENDS (trend_id INT PRIMARY KEY, trend_name VARCHAR(50), popularity INT); INSERT INTO FASHION_TRENDS (trend_id, trend_name, popularity) VALUES (1, 'TrendA', 1000), (2, 'TrendB', 800), (3, 'TrendC', 1200), (4, 'TrendD', 500);", "sql": "DELETE FROM FASHION_TRENDS WHERE trend_name = 'TrendD';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total CO2 emission reduction from recycling programs in Japan and South Korea?", "schema": "CREATE TABLE recycling_reduction (country VARCHAR(30), reduction FLOAT); INSERT INTO recycling_reduction (country, reduction) VALUES ('Japan', 1.6), ('Japan', 1.8), ('South Korea', 0.9), ('South Korea', 1.2);", "sql": "SELECT SUM(reduction) FROM recycling_reduction WHERE country IN ('Japan', 'South Korea');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the usual attendance for july 2?", "schema": "CREATE TABLE table_name_6 (attendance INTEGER, date VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_6 WHERE date = 'july 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average yield and production cost for each crop variety in the \"crops\" and \"expenses\" tables?", "schema": "CREATE TABLE crops (id INT, crop_name VARCHAR(50), yield INT, revenue INT); CREATE TABLE expenses (id INT, crop_id INT, cost INT);", "sql": "SELECT crops.crop_name, AVG(crops.yield) AS avg_yield, AVG(expenses.cost) AS avg_cost FROM crops INNER JOIN expenses ON crops.id = expenses.crop_id GROUP BY crops.crop_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What party won with a margin of 105731?", "schema": "CREATE TABLE table_22756549_1 (party VARCHAR, margin VARCHAR)", "sql": "SELECT party FROM table_22756549_1 WHERE margin = 105731;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'spgist' (example 17).", "schema": null, "sql": "insert into spgist_text_tbl (id, t)\nselect g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g\nunion all\nselect g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;", "explanation": "DML from PostgreSQL core regression test for Spgist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "List all the wastewater treatment plants in Canada and their capacities.", "schema": "CREATE TABLE wastewater_treatment (plant_name VARCHAR(50), country VARCHAR(20), capacity_m3 INT); INSERT INTO wastewater_treatment (plant_name, country, capacity_m3) VALUES ('Vancouver WWTP', 'Canada', 1500000);", "sql": "SELECT plant_name, country, capacity_m3 FROM wastewater_treatment WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 148).", "schema": null, "sql": "SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: size (steps) of 15, and a just (cents) larger than 435.08 is what highest size (cents)?", "schema": "CREATE TABLE table_name_26 (size__cents_ INTEGER, size__steps_ VARCHAR, just__cents_ VARCHAR)", "sql": "SELECT MAX(size__cents_) FROM table_name_26 WHERE size__steps_ = 15 AND just__cents_ > 435.08;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What affiliation has an owned since larger than 1991, and also has a channel (tv/rf) of 42?", "schema": "CREATE TABLE table_name_58 (affiliation VARCHAR, owned_since VARCHAR, channel___tv___rf__ VARCHAR)", "sql": "SELECT affiliation FROM table_name_58 WHERE owned_since > 1991 AND channel___tv___rf__ = '42';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "List all food safety violations for 'Budget Burger' in Q2 2020.", "schema": "CREATE TABLE Inspections (restaurant_id INT, inspection_date DATE, violation_count INT); INSERT INTO Inspections (restaurant_id, inspection_date, violation_count) VALUES (2, '2020-04-01', 3), (2, '2020-07-15', 2);", "sql": "SELECT * FROM Inspections WHERE restaurant_id = 2 AND EXTRACT(QUARTER FROM inspection_date) = 2 AND EXTRACT(YEAR FROM inspection_date) = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Risca RFC has 54 tries for and how many draws?", "schema": "CREATE TABLE table_13741576_6 (drawn VARCHAR, tries_for VARCHAR)", "sql": "SELECT drawn FROM table_13741576_6 WHERE tries_for = '54';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "How many total units of Lanthanum were produced between 2018 and 2020?", "schema": "CREATE TABLE production (year INT, element VARCHAR(10), quantity INT); INSERT INTO production (year, element, quantity) VALUES (2015, 'Lanthanum', 1200), (2016, 'Lanthanum', 1400), (2017, 'Lanthanum', 1500), (2018, 'Lanthanum', 1700), (2019, 'Lanthanum', 1800), (2020, 'Lanthanum', 2000), (2021, 'Lanthanum', 2200);", "sql": "SELECT SUM(quantity) FROM production WHERE element = 'Lanthanum' AND year BETWEEN 2018 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the loss of the game that had a record of 3-3?", "schema": "CREATE TABLE table_name_41 (loss VARCHAR, record VARCHAR)", "sql": "SELECT loss FROM table_name_41 WHERE record = '3-3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the minimum number of followers for users who have posted more than 50 times in the United States?", "schema": "CREATE TABLE users (id INT, follower_count INT, country VARCHAR(255)); INSERT INTO users (id, follower_count, country) VALUES (1, 1500, 'Germany'), (2, 500, 'USA'), (3, 2000, 'Germany'), (4, 1200, 'France'), (5, 800, 'USA'); CREATE TABLE posts (id INT, user_id INT, post_date DATE); INSERT INTO posts (id, user_id, post_date) VALUES (1, 1, '2022-01-01'), (2, 1, '2022-01-02'), (3, 2, '2022-01-01'), (4, 3, '2022-01-01'), (5, 3, '2022-01-02'), (6, 3, '2022-01-03'), (7, 4, '2022-01-01'), (8, 4, '2022-01-02'), (9, 4, '2022-01-03'), (10, 4, '2022-01-04'), (11, 4, '2022-01-05'), (12, 5, '2022-01-01');", "sql": "SELECT MIN(follower_count) FROM users WHERE id IN (SELECT user_id FROM posts GROUP BY user_id HAVING COUNT(post_date) > 50) AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the average year for my queen", "schema": "CREATE TABLE table_name_65 (year INTEGER, english_title VARCHAR)", "sql": "SELECT AVG(year) FROM table_name_65 WHERE english_title = 'my queen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many public parks have been created in each borough since 2015?", "schema": "CREATE TABLE Parks (ParkID INT, ParkName VARCHAR(255), Borough VARCHAR(255), CreationDate DATE); INSERT INTO Parks (ParkID, ParkName, Borough, CreationDate) VALUES (1, 'Central Park', 'Manhattan', '2015-07-01'), (2, 'Prospect Park', 'Brooklyn', '2016-04-01'), (3, 'Van Cortlandt Park', 'Bronx', '2017-08-01');", "sql": "SELECT COUNT(*), Borough FROM Parks WHERE CreationDate >= '2015-01-01' GROUP BY Borough;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Add a new publication to the publications table for author with ID 5", "schema": "CREATE TABLE authors (id INT, name VARCHAR(50)); INSERT INTO authors (id, name) VALUES (5, 'John Smith'); CREATE TABLE publications (id INT, title VARCHAR(50), year INT, author_id INT);", "sql": "INSERT INTO publications (id, title, year, author_id) VALUES (101, 'New Publication', 2022, 5);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the maximum salary in each department for employees who have been with the company for more than 10 years?", "schema": "CREATE TABLE Employees (EmployeeID INT, Gender VARCHAR(10), Department VARCHAR(20), Salary DECIMAL(10,2), Tenure INT); INSERT INTO Employees (EmployeeID, Gender, Department, Salary, Tenure) VALUES (1, 'Male', 'IT', 75000.00, 12), (2, 'Female', 'IT', 70000.00, 5), (3, 'Non-binary', 'HR', 60000.00, 8);", "sql": "SELECT Department, MAX(Salary) FROM Employees WHERE Tenure > 10 GROUP BY Department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 627).", "schema": null, "sql": "SELECT '[\"a\",\"b\",\"c\",[1,2],null]'::jsonb -> -5;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[\"a\",\"b\",\"c\",[1,2],null]'::jsonb -> -5) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 47, "num_statements": 1} {"question": "What is the average number of public participation events attended by citizens in each district for the year 2021?", "schema": "CREATE TABLE district (id INT, name VARCHAR); INSERT INTO district (id, name) VALUES (1, 'District 1'), (2, 'District 2'), (3, 'District 3'), (4, 'District 4'), (5, 'District 5'); CREATE TABLE participation (id INT, district_id INT, event_id INT, date DATE); INSERT INTO participation (id, district_id, event_id, date) VALUES (1, 1, 1, '2021-05-01'), (2, 2, 1, '2021-05-01'), (3, 3, 1, '2021-05-01'), (4, 4, 2, '2021-06-15'), (5, 5, 2, '2021-06-15'), (6, 1, 3, '2021-07-20'), (7, 2, 3, '2021-07-20'), (8, 3, 3, '2021-07-20'), (9, 4, 4, '2021-08-28'), (10, 5, 4, '2021-08-28');", "sql": "SELECT district_id, AVG(total_events) as avg_events FROM (SELECT district_id, event_id, COUNT(*) as total_events FROM participation WHERE date >= '2021-01-01' AND date < '2022-01-01' GROUP BY district_id, event_id) as subquery GROUP BY district_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 248, "num_statements": 1} {"question": "What is the total number of military personnel by branch for each country?", "schema": "CREATE TABLE MilitaryPersonnel (PersonnelID INT, PersonnelBranch VARCHAR(20), PersonnelCountry VARCHAR(30), PersonnelCount INT); INSERT INTO MilitaryPersonnel (PersonnelID, PersonnelBranch, PersonnelCountry, PersonnelCount) VALUES (1, 'Army', 'USA', 500000), (2, 'Navy', 'USA', 350000), (3, 'Air Force', 'Canada', 20000), (4, 'Marines', 'UK', 25000);", "sql": "SELECT PersonnelBranch, PersonnelCountry, SUM(PersonnelCount) as Total FROM MilitaryPersonnel GROUP BY PersonnelBranch, PersonnelCountry;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2007 value with a 2r in 2012 and 1r in 2002?", "schema": "CREATE TABLE table_name_53 (Id VARCHAR)", "sql": "SELECT 2007 FROM table_name_53 WHERE 2012 = '2r' AND 2002 = '1r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the earliest launch date for each aircraft model?", "schema": "CREATE TABLE aircrafts (aircraft_id INT, model VARCHAR(50), launch_date DATE); INSERT INTO aircrafts (aircraft_id, model, launch_date) VALUES (1, 'Boeing 747', '2000-01-01'), (2, 'Airbus A320', '2010-01-01'), (3, 'Boeing 737', '1995-01-01'); CREATE TABLE accidents (accident_id INT, aircraft_id INT, date DATE); INSERT INTO accidents (accident_id, aircraft_id) VALUES (1, 1), (2, 1), (3, 3), (4, 2), (5, 2);", "sql": "SELECT model, MIN(launch_date) as earliest_launch_date FROM aircrafts WHERE aircraft_id NOT IN (SELECT aircraft_id FROM accidents) GROUP BY model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What park is Boardwalk Bullet located in?", "schema": "CREATE TABLE table_2665085_1 (park VARCHAR, name VARCHAR)", "sql": "SELECT park FROM table_2665085_1 WHERE name = 'Boardwalk Bullet';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the number of public libraries in each county in the state of California, including their names and total book collections?", "schema": "CREATE TABLE counties(id INT, name TEXT, state TEXT); INSERT INTO counties VALUES (1, 'County A', 'California'); INSERT INTO counties VALUES (2, 'County B', 'California'); INSERT INTO counties VALUES (3, 'County C', 'California'); CREATE TABLE libraries(id INT, county_id INT, name TEXT, total_books INT); INSERT INTO libraries VALUES (1, 1, 'Library A', 10000); INSERT INTO libraries VALUES (2, 1, 'Library B', 12000); INSERT INTO libraries VALUES (3, 2, 'Library C', 14000); INSERT INTO libraries VALUES (4, 3, 'Library D', 16000);", "sql": "SELECT c.name as county_name, l.name as library_name, COUNT(*) as library_count, SUM(l.total_books) as total_books FROM counties c JOIN libraries l ON c.id = l.county_id WHERE c.state = 'California' GROUP BY c.name, l.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is Wayne Day's crew chief?", "schema": "CREATE TABLE table_2182170_1 (crew_chief VARCHAR, listed_owner_s_ VARCHAR)", "sql": "SELECT crew_chief FROM table_2182170_1 WHERE listed_owner_s_ = 'Wayne Day';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Delete the record for 'capacity_building' table, where 'program_name' is 'Youth Mentorship'.", "schema": "CREATE TABLE capacity_building (id INT, program_name VARCHAR(25)); INSERT INTO capacity_building (id, program_name) VALUES (1, 'Youth Mentorship'), (2, 'Leadership Development');", "sql": "DELETE FROM capacity_building WHERE program_name = 'Youth Mentorship';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of patients diagnosed with Ebola in Democratic Republic of Congo in 2021?", "schema": "CREATE TABLE Patients (ID INT, Gender VARCHAR(10), Disease VARCHAR(20), Country VARCHAR(30), Diagnosis_Date DATE); INSERT INTO Patients (ID, Gender, Disease, Country, Diagnosis_Date) VALUES (1, 'Male', 'Ebola', 'Democratic Republic of Congo', '2021-01-01');", "sql": "SELECT COUNT(*) FROM Patients WHERE Disease = 'Ebola' AND Country = 'Democratic Republic of Congo' AND YEAR(Diagnosis_Date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 46).", "schema": null, "sql": "-- Look for casts to/from the same type that aren't length coercion functions.\n-- (We assume they are length coercions if they take multiple arguments.)\n-- Such entries are not necessarily harmful, but they are useless.\n\nSELECT *\nFROM pg_cast c\nWHERE castsource = casttarget AND castfunc = 0;", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 91.", "schema": null, "sql": "CREATE TABLE measurement ( city_id int not null, logdate date not null, peaktemp int, unitsales int ) PARTITION BY RANGE (logdate);", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 1} {"question": "PostgreSQL regression test 'json': Write the SELECT query (example 1).", "schema": null, "sql": "SELECT '\"\"'::json;\t\t\t\t-- OK.\nSELECT $$''$$::json;\t\t\t-- ERROR, single quotes are not allowed\nSELECT '\"abc\"'::json;\t\t\t-- OK\nSELECT '\"abc'::json;\t\t\t-- ERROR, quotes not closed\nSELECT '\"abc\ndef\"'::json;\t\t\t\t\t-- ERROR, unescaped newline in string constant\nSELECT '\"\\n\\\"\\\\\"'::json;\t\t-- OK, legal escapes\nSELECT '\"\\v\"'::json;\t\t\t-- ERROR, not a valid JSON escape\n\n-- Check fast path for longer strings (at least 16 bytes long)\nSELECT ('\"'||repeat('.', 12)||'abc\"')::json; -- OK\nSELECT ('\"'||repeat('.', 12)||'abc\\n\"')::json; -- OK, legal escapes\n\n-- Test various lengths of strings to validate SIMD processing to escape\n-- special chars in the JSON.\nSELECT row_to_json(j)::jsonb FROM (\n SELECT left(E'abcdefghijklmnopqrstuv\"\\twxyz012345678', a) AS a\n FROM generate_series(0,37) a\n) j;\n\n-- see json_encoding test for input with unicode escapes\n\n-- Numbers.\nSELECT '1'::json;\t\t\t\t-- OK\nSELECT '0'::json;\t\t\t\t-- OK\nSELECT '01'::json;\t\t\t\t-- ERROR, not valid according to JSON spec\nSELECT '0.1'::json;\t\t\t\t-- OK\nSELECT '9223372036854775808'::json;\t-- OK, even though it's too large for int8\nSELECT '1e100'::json;\t\t\t-- OK\nSELECT '1.3e100'::json;\t\t\t-- OK\nSELECT '1f2'::json;\t\t\t\t-- ERROR\nSELECT '0.x1'::json;\t\t\t-- ERROR\nSELECT '1.3ex100'::json;\t\t-- ERROR\n\n-- Arrays.\nSELECT '[]'::json;\t\t\t\t-- OK\nSELECT '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'::json; -- OK\nSELECT '[1,2]'::json;\t\t\t-- OK\nSELECT '[1,2,]'::json;\t\t\t-- ERROR, trailing comma\nSELECT '[1,2'::json;\t\t\t-- ERROR, no closing bracket\nSELECT '[1,[2]'::json;\t\t\t-- ERROR, no closing bracket\n\n-- Objects.\nSELECT '{}'::json;\t\t\t\t-- OK\nSELECT '{\"abc\"}'::json;\t\t\t-- ERROR, no value\nSELECT '{\"abc\":1}'::json;\t\t-- OK\nSELECT '{1:\"abc\"}'::json;\t\t-- ERROR, keys must be strings\nSELECT '{\"abc\",1}'::json;\t\t-- ERROR, wrong separator\nSELECT '{\"abc\"=1}'::json;\t\t-- ERROR, totally wrong separator\nSELECT '{\"abc\"::1}'::json;\t\t-- ERROR, another wrong separator\nSELECT '{\"abc\":1,\"def\":2,\"ghi\":[3,4],\"hij\":{\"klm\":5,\"nop\":[6]}}'::json; -- OK\nSELECT '{\"abc\":1:2}'::json;\t\t-- ERROR, colon in wrong spot\nSELECT '{\"abc\":1,3}'::json;\t\t-- ERROR, no value\n\n-- Recursion.\nSET max_stack_depth = '100kB';\nSELECT repeat('[', 10000)::json;\nSELECT repeat('{\"a\":', 10000)::json;\nRESET max_stack_depth;\n\n-- Miscellaneous stuff.\nSELECT 'true'::json;\t\t\t-- OK\nSELECT 'false'::json;\t\t\t-- OK\nSELECT 'null'::json;\t\t\t-- OK\nSELECT ' true '::json;\t\t\t-- OK, even with extra whitespace\nSELECT 'true false'::json;\t\t-- ERROR, too many values\nSELECT 'true, false'::json;\t\t-- ERROR, too many values\nSELECT 'truf'::json;\t\t\t-- ERROR, not a keyword\nSELECT 'trues'::json;\t\t\t-- ERROR, not a keyword\nSELECT ''::json;\t\t\t\t-- ERROR, no value\nSELECT ' '::json;\t\t\t-- ERROR, no value\n\n-- Multi-line JSON input to check ERROR reporting\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":\"two\",\n\t\t\"three\":\n\t\ttrue}'::json; -- OK\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":,\"two\", -- ERROR extraneous comma before field \"two\"\n\t\t\"three\":\n\t\ttrue}'::json;\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":\"two\",\n\t\t\"averyveryveryveryveryveryveryveryveryverylongfieldname\":}'::json;\n-- ERROR missing value for last field\n\n-- test non-error-throwing input\nselect pg_input_is_valid('{\"a\":true}', 'json');\nselect pg_input_is_valid('{\"a\":true', 'json');\nselect * from pg_input_error_info('{\"a\":true', 'json');\n\n--constructors\n-- array_to_json\n\nSELECT array_to_json(array(select 1 as a));\nSELECT array_to_json(array_agg(q),false) from (select x as b, x * 2 as c from generate_series(1,3) x) q;\nSELECT array_to_json(array_agg(q),true) from (select x as b, x * 2 as c from generate_series(1,3) x) q;\nSELECT array_to_json(array_agg(q),false)\n FROM ( SELECT $$a$$ || x AS b, y AS c,\n ARRAY[ROW(x.*,ARRAY[1,2,3]),\n ROW(y.*,ARRAY[4,5,6])] AS z\n FROM generate_series(1,2) x,\n generate_series(4,5) y) q;", "explanation": "Regression test for Json in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '\"\"'::json;\t\t\t\t-- OK.\nSELECT $$''$$::json;\t\t\t-- ERROR, single quotes are not allowed\nSELECT '\"abc\"'::json;\t\t\t-- OK\nSELECT '\"abc'::json;\t\t\t-- ERROR, quotes not closed\nSELECT '\"abc\ndef\"'::json;\t\t\t\t\t-- ERROR, unescaped newline in string constant\nSELECT '\"\\n\\\"\\\\\"'::json;\t\t-- OK, legal escapes\nSELECT '\"\\v\"'::json;\t\t\t-- ERROR, not a valid JSON escape\n\n-- Check fast path for longer strings (at least 16 bytes long)\nSELECT ('\"'||repeat('.', 12)||'abc\"')::json; -- OK\nSELECT ('\"'||repeat('.', 12)||'abc\\n\"')::json; -- OK, legal escapes\n\n-- Test various lengths of strings to validate SIMD processing to escape\n-- special chars in the JSON.\nSELECT row_to_json(j)::jsonb FROM (\n SELECT left(E'abcdefghijklmnopqrstuv\"\\twxyz012345678', a) AS a\n FROM generate_series(0,37) a\n) j;\n\n-- see json_encoding test for input with unicode escapes\n\n-- Numbers.\nSELECT '1'::json;\t\t\t\t-- OK\nSELECT '0'::json;\t\t\t\t-- OK\nSELECT '01'::json;\t\t\t\t-- ERROR, not valid according to JSON spec\nSELECT '0.1'::json;\t\t\t\t-- OK\nSELECT '9223372036854775808'::json;\t-- OK, even though it's too large for int8\nSELECT '1e100'::json;\t\t\t-- OK\nSELECT '1.3e100'::json;\t\t\t-- OK\nSELECT '1f2'::json;\t\t\t\t-- ERROR\nSELECT '0.x1'::json;\t\t\t-- ERROR\nSELECT '1.3ex100'::json;\t\t-- ERROR\n\n-- Arrays.\nSELECT '[]'::json;\t\t\t\t-- OK\nSELECT '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'::json; -- OK\nSELECT '[1,2]'::json;\t\t\t-- OK\nSELECT '[1,2,]'::json;\t\t\t-- ERROR, trailing comma\nSELECT '[1,2'::json;\t\t\t-- ERROR, no closing bracket\nSELECT '[1,[2]'::json;\t\t\t-- ERROR, no closing bracket\n\n-- Objects.\nSELECT '{}'::json;\t\t\t\t-- OK\nSELECT '{\"abc\"}'::json;\t\t\t-- ERROR, no value\nSELECT '{\"abc\":1}'::json;\t\t-- OK\nSELECT '{1:\"abc\"}'::json;\t\t-- ERROR, keys must be strings\nSELECT '{\"abc\",1}'::json;\t\t-- ERROR, wrong separator\nSELECT '{\"abc\"=1}'::json;\t\t-- ERROR, totally wrong separator\nSELECT '{\"abc\"::1}'::json;\t\t-- ERROR, another wrong separator\nSELECT '{\"abc\":1,\"def\":2,\"ghi\":[3,4],\"hij\":{\"klm\":5,\"nop\":[6]}}'::json; -- OK\nSELECT '{\"abc\":1:2}'::json;\t\t-- ERROR, colon in wrong spot\nSELECT '{\"abc\":1,3}'::json;\t\t-- ERROR, no value\n\n-- Recursion.\nSET max_stack_depth = '100kB';\nSELECT repeat('[', 10000)::json;\nSELECT repeat('{\"a\":', 10000)::json;\nRESET max_stack_depth;\n\n-- Miscellaneous stuff.\nSELECT 'true'::json;\t\t\t-- OK\nSELECT 'false'::json;\t\t\t-- OK\nSELECT 'null'::json;\t\t\t-- OK\nSELECT ' true '::json;\t\t\t-- OK, even with extra whitespace\nSELECT 'true false'::json;\t\t-- ERROR, too many values\nSELECT 'true, false'::json;\t\t-- ERROR, too many values\nSELECT 'truf'::json;\t\t\t-- ERROR, not a keyword\nSELECT 'trues'::json;\t\t\t-- ERROR, not a keyword\nSELECT ''::json;\t\t\t\t-- ERROR, no value\nSELECT ' '::json;\t\t\t-- ERROR, no value\n\n-- Multi-line JSON input to check ERROR reporting\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":\"two\",\n\t\t\"three\":\n\t\ttrue}'::json; -- OK\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":,\"two\", -- ERROR extraneous comma before field \"two\"\n\t\t\"three\":\n\t\ttrue}'::json;\nSELECT '{\n\t\t\"one\": 1,\n\t\t\"two\":\"two\",\n\t\t\"averyveryveryveryveryveryveryveryveryverylongfieldname\":}'::json;\n-- ERROR missing value for last field\n\n-- test non-error-throwing input\nselect pg_input_is_valid('{\"a\":true}', 'json');\nselect pg_input_is_valid('{\"a\":true', 'json');\nselect * from pg_input_error_info('{\"a\":true', 'json');\n\n--constructors\n-- array_to_json\n\nSELECT array_to_json(array(select 1 as a));\nSELECT array_to_json(array_agg(q),false) from (select x as b, x * 2 as c from generate_series(1,3) x) q;\nSELECT array_to_json(array_agg(q),true) from (select x as b, x * 2 as c from generate_series(1,3) x) q;\nSELECT array_to_json(array_agg(q),false)\n FROM ( SELECT $$a$$ || x AS b, y AS c,\n ARRAY[ROW(x.*,ARRAY[1,2,3]),\n ROW(y.*,ARRAY[4,5,6])] AS z\n FROM generate_series(1,2) x,\n generate_series(4,5) y) q) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 3916, "num_statements": 60} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 380).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (9,2,'-59265296.260444467');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the premiere that had a 20.9 rating?", "schema": "CREATE TABLE table_name_38 (premiere VARCHAR, rating VARCHAR)", "sql": "SELECT premiere FROM table_name_38 WHERE rating = '20.9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Get the total energy consumption for each production site and the average energy consumption per chemical per site", "schema": "CREATE TABLE Production_Sites (site_id INT, site_name VARCHAR(50), energy_consumption INT); CREATE TABLE Production_Chemicals (site_id INT, chemical_id INT, energy_consumption INT);", "sql": "SELECT ps.site_name, SUM(ps.energy_consumption) as total_energy, AVG(pc.energy_consumption/cnt.chemical_count) as avg_energy_per_chemical FROM Production_Sites ps JOIN Production_Chemicals pc ON ps.site_id = pc.site_id JOIN (SELECT site_id, COUNT(DISTINCT chemical_id) as chemical_count FROM Production_Chemicals GROUP BY site_id) cnt ON ps.site_id = cnt.site_id GROUP BY ps.site_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 385, "num_statements": 1} {"question": "What is the average number of algorithmic fairness assessments conducted per month in India in 2022?", "schema": "CREATE TABLE fairness_assessments (assessment_id INT, assessment_date DATE, country TEXT); INSERT INTO fairness_assessments (assessment_id, assessment_date, country) VALUES (1, '2022-01-02', 'India'), (2, '2022-02-15', 'India'), (3, '2022-03-27', 'India');", "sql": "SELECT AVG(num_assessments) as avg_assessments_per_month FROM (SELECT COUNT(*) as num_assessments, EXTRACT(MONTH FROM assessment_date) as month FROM fairness_assessments WHERE country = 'India' AND assessment_date BETWEEN '2022-01-01' AND '2022-12-31' GROUP BY month) as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 280, "num_statements": 1} {"question": "List all climate mitigation initiatives in Africa in 2020 and their corresponding funding amounts.", "schema": "CREATE TABLE climate_mitigation (year INT, country VARCHAR(50), initiative VARCHAR(50), amount FLOAT); INSERT INTO climate_mitigation (year, country, initiative, amount) VALUES (2020, 'Country X', 'Mitigation Initiative 1', 200000);", "sql": "SELECT initiative, amount FROM climate_mitigation WHERE year = 2020 AND country = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Update equipment status in 'equipment_maintenance' table", "schema": "CREATE TABLE equipment_maintenance (equipment_id INTEGER PRIMARY KEY, last_maintenance_date DATE, next_maintenance_date DATE, completed_maintenance BOOLEAN, status TEXT);", "sql": "UPDATE equipment_maintenance SET status = 'Operational' WHERE equipment_id = 1 AND completed_maintenance = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Bulgarian word for september?", "schema": "CREATE TABLE table_name_56 (bulgarian_name VARCHAR, english_name VARCHAR)", "sql": "SELECT bulgarian_name FROM table_name_56 WHERE english_name = 'september';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: when the population was 1599, what was the census ranking?", "schema": "CREATE TABLE table_170961_2 (census_ranking VARCHAR, population VARCHAR)", "sql": "SELECT census_ranking FROM table_170961_2 WHERE population = 1599;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the percentage of donors who donated more than $1000 in the year 2021 compared to the total number of donors in that year?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, Amount DECIMAL(10,2), Year INT); INSERT INTO Donors (DonorID, DonorName, Amount, Year) VALUES (1, 'John Doe', 500.00, 2021), (2, 'Jane Smith', 1500.00, 2021), (3, 'Bob Johnson', 750.00, 2021);", "sql": "SELECT (COUNT(DonorID) * 100.00 / (SELECT COUNT(DonorID) FROM Donors WHERE Year = 2021)) FROM Donors WHERE Amount > 1000 AND Year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "How many mobile devices were sold in each state of the United States, for the month of July, 2022?", "schema": "CREATE TABLE sales (sale_id INT, sale_date DATE, quantity INT, state VARCHAR(50));", "sql": "SELECT EXTRACT(MONTH FROM sale_date) AS month, state, SUM(quantity) AS total_sales FROM sales WHERE YEAR(sale_date) = 2022 AND MONTH(sale_date) = 7 GROUP BY month, state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Votes of 27,391 has which quotient?", "schema": "CREATE TABLE table_name_10 (quotient VARCHAR, votes VARCHAR)", "sql": "SELECT quotient FROM table_name_10 WHERE votes = '27,391';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the maximum number of military bases in African countries?", "schema": "CREATE TABLE military_bases (country VARCHAR(50), num_bases INT); INSERT INTO military_bases (country, num_bases) VALUES ('Egypt', 12), ('Algeria', 15), ('South Africa', 10), ('Morocco', 14), ('Sudan', 11), ('Libya', 8);", "sql": "SELECT MAX(num_bases) FROM military_bases WHERE country IN ('Egypt', 'Algeria', 'South Africa', 'Morocco', 'Sudan', 'Libya');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "List all regions with network investments greater than $5000.", "schema": "CREATE TABLE network_investments (region TEXT, investment_amount FLOAT);", "sql": "SELECT region FROM network_investments WHERE investment_amount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 26).", "schema": null, "sql": "SELECT '{-1,3,1}'::int[] & '{1,2}';", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "How many schools are there in 'CityF' as of 2021?", "schema": "CREATE TABLE Schools (City VARCHAR(20), Year INT, Number INT); INSERT INTO Schools (City, Year, Number) VALUES ('CityF', 2021, 30);", "sql": "SELECT Number FROM Schools WHERE City = 'CityF' AND Year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which fashion brands have the highest and lowest diversity scores for their product sizes?", "schema": "CREATE TABLE DiversityScores (BrandID INT, DiversityScore INT);", "sql": "SELECT FB.BrandName, DS.DiversityScore FROM FashionBrands FB INNER JOIN DiversityScores DS ON FB.BrandID = DS.BrandID ORDER BY DS.DiversityScore ASC, FB.BrandName ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Kim Thompson win with 278 score?", "schema": "CREATE TABLE table_name_87 (year INTEGER, score VARCHAR, champion VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_87 WHERE score = '278' AND champion = 'kim thompson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Delete all records from the 'surveys' table where 'survey_date' is in the year 2005", "schema": "CREATE TABLE surveys (id INT PRIMARY KEY, mine_id VARCHAR(10), survey_date DATE); INSERT INTO surveys (id, mine_id, survey_date) VALUES (1, 'Mine_001', '2005-01-01'); INSERT INTO surveys (id, mine_id, survey_date) VALUES (2, 'Mine_002', '2006-01-01'); INSERT INTO surveys (id, mine_id, survey_date) VALUES (3, 'Mine_001', '2004-01-01');", "sql": "DELETE FROM surveys WHERE YEAR(survey_date) = 2005;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_sql_function, item 17).", "schema": null, "sql": "-- test router executor parameterized sql functions\nCREATE TABLE temp_table (\n\tkey int,\n\tvalue int\n);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When November is 153 what is the March number?", "schema": "CREATE TABLE table_25235489_2 (march_27_29 VARCHAR, november_3 VARCHAR)", "sql": "SELECT march_27_29 FROM table_25235489_2 WHERE november_3 = '153';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "How many size 30 jeans were sold in the last month?", "schema": "CREATE TABLE sales_data (sale_id INT, jean_size INT, sale_date DATE); INSERT INTO sales_data (sale_id, jean_size, sale_date) VALUES (1, 28, '2022-06-03'), (2, 30, '2022-06-15'), (3, 32, '2022-06-27'), (4, 26, '2022-07-08'), (5, 30, '2022-07-15');", "sql": "SELECT COUNT(*) FROM sales_data WHERE jean_size = 30 AND sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the total number of emergency calls and crimes reported during natural disasters?", "schema": "CREATE TABLE disasters (disaster_id INT, disaster_name VARCHAR(255), disaster_date DATE);CREATE TABLE emergency_calls (id INT, disaster_id INT, call_type VARCHAR(255), call_date DATE);CREATE TABLE crimes (id INT, disaster_id INT, crime_type VARCHAR(255), crime_date DATE);", "sql": "SELECT 'Total emergency calls' AS metric, COUNT(ec.id) AS count FROM emergency_calls ec JOIN disasters d ON ec.disaster_id = d.disaster_id UNION ALL SELECT 'Total crimes' AS metric, COUNT(c.id) AS count FROM crimes c JOIN disasters d ON c.disaster_id = d.disaster_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Lanes have a Country of jamaica, and a Name of kerron stewart, and a Time larger than 22?", "schema": "CREATE TABLE table_name_46 (lane INTEGER, time VARCHAR, country VARCHAR, name VARCHAR)", "sql": "SELECT SUM(lane) FROM table_name_46 WHERE country = 'jamaica' AND name = 'kerron stewart' AND time > 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the released for pat mills", "schema": "CREATE TABLE table_1620397_2 (released VARCHAR, author VARCHAR)", "sql": "SELECT released FROM table_1620397_2 WHERE author = 'Pat Mills';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER TABLESPACE (example 2).", "schema": null, "sql": "ALTER TABLESPACE index_space OWNER TO mary;", "explanation": "PostgreSQL ALTER TABLESPACE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": true, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many viewers are the when the draw is 3?", "schema": "CREATE TABLE table_27994983_8 (viewers INTEGER, draw VARCHAR)", "sql": "SELECT MAX(viewers) FROM table_27994983_8 WHERE draw = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "List the ports that have been visited by vessels with a maximum cargo capacity of over 15000 tons.", "schema": "CREATE TABLE Port_Visits (id INT, vessel VARCHAR(255), capacity INT, port VARCHAR(255), time DATETIME); INSERT INTO Port_Visits (id, vessel, capacity, port, time) VALUES (1, 'Sea Titan', 20000, 'Singapore', '2020-01-01 10:00:00'), (2, 'Ocean Wave', 12000, 'Sydney', '2020-02-15 15:30:00');", "sql": "SELECT DISTINCT port FROM Port_Visits PV JOIN (SELECT vessel, capacity FROM Vessels WHERE capacity > 15000) V ON PV.vessel = V.vessel;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What city was the game held in when the opponent was Towson?", "schema": "CREATE TABLE table_name_93 (city VARCHAR, opponent VARCHAR)", "sql": "SELECT city FROM table_name_93 WHERE opponent = 'towson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player is from Tampa, Florida?", "schema": "CREATE TABLE table_name_68 (player VARCHAR, hometown VARCHAR)", "sql": "SELECT player FROM table_name_68 WHERE hometown = 'tampa, florida';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of natural ingredients sourced from South America?", "schema": "CREATE TABLE Ingredient_Sourcing (SupplierID INT, ProductID INT, Natural BOOLEAN, Region VARCHAR(50)); INSERT INTO Ingredient_Sourcing (SupplierID, ProductID, Natural, Region) VALUES (3001, 101, TRUE, 'South America'), (3002, 102, FALSE, 'South America'), (3003, 101, TRUE, 'South America'), (3004, 103, FALSE, 'South America'), (3005, 102, TRUE, 'South America');", "sql": "SELECT SUM(Natural) as TotalNatural FROM Ingredient_Sourcing WHERE Region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Name of the Space Telescope Terminated on March 1993?", "schema": "CREATE TABLE table_name_31 (name VARCHAR, terminated VARCHAR)", "sql": "SELECT name FROM table_name_31 WHERE terminated = 'march 1993';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the frequency of the station located in polangui, albay?", "schema": "CREATE TABLE table_27588823_2 (frequency VARCHAR, location VARCHAR)", "sql": "SELECT frequency FROM table_27588823_2 WHERE location = 'Polangui, Albay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which bullet has a Shoulder of 10.92 (.430)?", "schema": "CREATE TABLE table_name_25 (bullet VARCHAR, shoulder VARCHAR)", "sql": "SELECT bullet FROM table_name_25 WHERE shoulder = '10.92 (.430)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Lost has Games larger than 7?", "schema": "CREATE TABLE table_name_54 (lost INTEGER, games INTEGER)", "sql": "SELECT AVG(lost) FROM table_name_54 WHERE games > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was parliament assembled when John rudhale was first member?", "schema": "CREATE TABLE table_15451122_2 (assembled VARCHAR, first_member VARCHAR)", "sql": "SELECT assembled FROM table_15451122_2 WHERE first_member = 'John Rudhale';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What are the bioprocess engineering companies that have received funding?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50), industry VARCHAR(50), location VARCHAR(50)); INSERT INTO company (id, name, industry, location) VALUES (1, 'GenTech', 'Genetic Research', 'San Francisco'); INSERT INTO company (id, name, industry, location) VALUES (2, 'BioEngineer', 'Bioprocess Engineering', 'Boston'); INSERT INTO company (id, name, industry, location) VALUES (3, 'BioSolutions', 'Bioprocess Engineering', 'Seattle'); CREATE TABLE funding (company_id INT, round VARCHAR(50), amount FLOAT); INSERT INTO funding (company_id, round, amount) VALUES (2, 'Seed', 3000000); INSERT INTO funding (company_id, round, amount) VALUES (3, 'Series A', 8000000);", "sql": "SELECT c.name FROM company c JOIN funding f ON c.id = f.company_id WHERE c.industry = 'Bioprocess Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_behavioral_analytics_create_table, item 43).", "schema": null, "sql": "CREATE INDEX is_index3 ON users_table(value_1);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'returning' (example 90).", "schema": null, "sql": "DELETE FROM foo WHERE f1 = 5\n RETURNING (SELECT max(old.f4 + x) FROM generate_series(1, 10) x) old_max,\n (SELECT max(new.f4 + x) FROM generate_series(1, 10) x) new_max;", "explanation": "DML from PostgreSQL core regression test for Returning.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 180, "num_statements": 1} {"question": "Show the number of patients treated by 'Dr. Jane' and their respective conditions.", "schema": "CREATE TABLE treatment (treatment_id INT, patient_id INT, condition VARCHAR(50), provider VARCHAR(50), date DATE); INSERT INTO treatment (treatment_id, patient_id, condition, provider, date) VALUES (1, 1, 'Anxiety Disorder', 'Dr. Jane', '2021-01-01'); INSERT INTO treatment (treatment_id, patient_id, condition, provider, date) VALUES (2, 1, 'PTSD', 'Dr. Bob', '2021-02-01');", "sql": "SELECT COUNT(patient_id), condition FROM treatment WHERE provider = 'Dr. Jane' GROUP BY condition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the average number of military personnel in humanitarian assistance operations by country, for countries with more than 200 personnel?", "schema": "CREATE TABLE HumanitarianAssistanceOperations (id INT, country VARCHAR(50), military_personnel INT);", "sql": "SELECT country, AVG(military_personnel) FROM HumanitarianAssistanceOperations GROUP BY country HAVING COUNT(*) > 200;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (bookend, item 6).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_functions.bookend_serializefunc(internal)\nRETURNS bytea\nAS '@MODULE_PATHNAME@', 'ts_bookend_serializefunc'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 189, "num_statements": 1} {"question": "Find the average price of garments in each category in the garments table", "schema": "CREATE TABLE garments (id INT, name VARCHAR(100), price DECIMAL(5,2), category VARCHAR(50));", "sql": "SELECT category, AVG(price) as avg_price FROM garments GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Points, when Year is after 1959, and when Class is 250cc?", "schema": "CREATE TABLE table_name_42 (points INTEGER, year VARCHAR, class VARCHAR)", "sql": "SELECT SUM(points) FROM table_name_42 WHERE year > 1959 AND class = '250cc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Delete records of donations and investments from the 'donations' and 'investments' tables with a value over 100000 for the 'amount' column.", "schema": "CREATE TABLE donations (donation_id INT, donor_id INT, cause_id INT, amount DECIMAL(10, 2)); CREATE TABLE investments (investment_id INT, investor_id INT, sector_id INT, amount DECIMAL(10, 2));", "sql": "DELETE FROM donations WHERE amount > 100000; DELETE FROM investments WHERE amount > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 2} {"question": "What are the unique technology accessibility concerns for people with visual impairments in the accessibility table?", "schema": "CREATE TABLE accessibility (id INT, disability VARCHAR(255), concern VARCHAR(255));", "sql": "SELECT DISTINCT concern FROM accessibility WHERE disability = 'people with visual impairments';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the maximum conservation status level for marine species in the Pacific Ocean?", "schema": "CREATE TABLE Pacific_Species (species_name TEXT, location TEXT, conservation_status TEXT); INSERT INTO Pacific_Species (species_name, location, conservation_status) VALUES ('Hawaiian Monk Seal', 'Pacific Ocean', 'Endangered'), ('Green Sea Turtle', 'Pacific Ocean', 'Threatened');", "sql": "SELECT MAX(conservation_status) FROM Pacific_Species;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the maximum ESG score for investments in the education sector, broken down by year?", "schema": "CREATE TABLE investments (investment_id INT, sector VARCHAR(50), esg_score INT, investment_date DATE); INSERT INTO investments (investment_id, sector, esg_score, investment_date) VALUES (1, 'Education', 4, '2022-01-01'), (2, 'Education', 5, '2022-02-01'), (3, 'Education', 3, '2022-03-01'), (4, 'Education', 6, '2022-04-01'), (5, 'Education', 2, '2022-05-01'), (6, 'Education', 7, '2023-01-01'), (7, 'Education', 8, '2023-02-01'), (8, 'Education', 9, '2023-03-01'), (9, 'Education', 10, '2023-04-01'), (10, 'Education', 1, '2023-05-01');", "sql": "SELECT EXTRACT(YEAR FROM investment_date) as year, MAX(esg_score) as max_esg_score FROM investments WHERE sector = 'Education' GROUP BY year ORDER BY year ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Show a SQL definition from the postgrest project (jwt, item 8).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION jwt.algorithm_sign(signables text, secret text, algorithm text)\nRETURNS text LANGUAGE sql AS $$\nWITH\n alg AS (\n SELECT CASE\n WHEN algorithm = 'HS256' THEN 'sha256'\n WHEN algorithm = 'HS384' THEN 'sha384'\n WHEN algorithm = 'HS512' THEN 'sha512'\n ELSE '' END) -- hmac throws error\nSELECT jwt.url_encode(public.hmac(signables, secret, (select * FROM alg)));\n$$;", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 410, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Which Country has a Year(s) won of 1977?", "schema": "CREATE TABLE table_name_60 (country VARCHAR, year_s__won VARCHAR)", "sql": "SELECT country FROM table_name_60 WHERE year_s__won = '1977';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "How many animals are in the 'conservation_program' table?", "schema": "CREATE TABLE conservation_program (id INT PRIMARY KEY, animal_name VARCHAR, num_animals INT);", "sql": "SELECT SUM(num_animals) FROM conservation_program;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "How many halal-certified makeup products were sold in the last quarter?", "schema": "CREATE TABLE Sales (id INT, product_id INT, sale_date DATE); CREATE TABLE Products (id INT, category TEXT, is_halal_certified BOOLEAN); INSERT INTO Sales (id, product_id, sale_date) VALUES (1, 1, '2022-01-05'), (2, 2, '2022-04-17'); INSERT INTO Products (id, category, is_halal_certified) VALUES (1, 'Makeup', true), (2, 'Skincare', false);", "sql": "SELECT COUNT(*) FROM Sales JOIN Products ON Sales.product_id = Products.id WHERE is_halal_certified = true AND category = 'Makeup' AND sale_date >= '2022-01-01' AND sale_date <= '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "What is the total number of electric vehicles sold in California and New York in 2020?", "schema": "CREATE TABLE EVSales (Id INT PRIMARY KEY, Model VARCHAR(100), Year INT, State VARCHAR(2), UnitsSold INT); INSERT INTO EVSales (Id, Model, Year, State, UnitsSold) VALUES (1, 'Tesla Model 3', 2020, 'CA', 50000), (2, 'Tesla Model Y', 2020, 'CA', 45000), (3, 'Tesla Model S', 2020, 'CA', 15000), (4, 'Tesla Model X', 2020, 'CA', 12000), (5, 'Chevrolet Bolt', 2020, 'CA', 8000), (6, 'Nissan Leaf', 2020, 'CA', 7000), (7, 'Tesla Model 3', 2020, 'NY', 25000), (8, 'Tesla Model Y', 2020, 'NY', 20000), (9, 'Tesla Model S', 2020, 'NY', 5000), (10, 'Tesla Model X', 2020, 'NY', 4000), (11, 'Chevrolet Bolt', 2020, 'NY', 3000), (12, 'Nissan Leaf', 2020, 'NY', 2000);", "sql": "SELECT SUM(UnitsSold) FROM EVSales WHERE Year = 2020 AND State IN ('CA', 'NY');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'update' (example 47).", "schema": null, "sql": "CREATE TABLE upsert_test_1 PARTITION OF upsert_test FOR VALUES IN (1);", "explanation": "DDL from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the maximum number of games played by any player from South America?", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Age INT, Country VARCHAR(50), GamesPlayed INT); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (1, 'John Doe', 25, 'USA', 100); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (2, 'Jane Smith', 30, 'Canada', 200); INSERT INTO Players (PlayerID, PlayerName, Age, Country, GamesPlayed) VALUES (3, 'Pedro Alvarez', 35, 'Brazil', 300);", "sql": "SELECT MAX(GamesPlayed) FROM Players WHERE Country IN ('Brazil', 'Argentina', 'Colombia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What round was a player from Indiana picked?", "schema": "CREATE TABLE table_name_71 (round VARCHAR, college VARCHAR)", "sql": "SELECT round FROM table_name_71 WHERE college = 'indiana';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_partitioning_utils, item 26).", "schema": null, "sql": "-- now create the partitions\nCREATE TABLE date_partition_2006 PARTITION OF date_partitioned_table FOR VALUES FROM ('2006-01-01') TO ('2007-01-01');", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'updatable_views' (example 3).", "schema": null, "sql": "INSERT INTO base_tbl SELECT i, 'Row ' || i FROM generate_series(-2, 2) g(i);", "explanation": "DML from PostgreSQL core regression test for Updatable Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average claim amount for policyholders living in California?", "schema": "CREATE TABLE Policyholders (ID INT, Name VARCHAR(50), Age INT, Gender VARCHAR(10), City VARCHAR(50), State VARCHAR(20), ZipCode VARCHAR(10)); CREATE TABLE Claims (ID INT, PolicyholderID INT, ClaimAmount DECIMAL(10,2), ClaimDate DATE);", "sql": "SELECT AVG(Claims.ClaimAmount) FROM Claims JOIN Policyholders ON Claims.PolicyholderID = Policyholders.ID WHERE Policyholders.State = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the away team on 10 January 1979 who had a tie no of 17?", "schema": "CREATE TABLE table_name_82 (away_team VARCHAR, date VARCHAR, tie_no VARCHAR)", "sql": "SELECT away_team FROM table_name_82 WHERE date = '10 january 1979' AND tie_no = '17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total length of highways in India?", "schema": "CREATE TABLE Highway (id INT, name VARCHAR(50), length FLOAT, country VARCHAR(50)); INSERT INTO Highway (id, name, length, country) VALUES (1, 'Golden Quadrilateral', 5846, 'India');", "sql": "SELECT SUM(length) FROM Highway WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Rank of Qi Hui?", "schema": "CREATE TABLE table_name_52 (rank VARCHAR, name VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_52 WHERE name = 'qi hui';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which teacher professional development courses were taken by teachers in district 101?", "schema": "CREATE TABLE teacher_pd_courses (teacher_id INT, course_id INT, district_id INT); INSERT INTO teacher_pd_courses (teacher_id, course_id, district_id) VALUES (1, 1001, 101), (2, 1002, 101), (3, 1003, 101), (4, 1004, 102), (5, 1005, 103); CREATE TABLE courses (course_id INT, course_name VARCHAR(255)); INSERT INTO courses (course_id, course_name) VALUES (1001, 'Course 1'), (1002, 'Course 2'), (1003, 'Course 3'), (1004, 'Course 4'), (1005, 'Course 5');", "sql": "SELECT c.course_name FROM teacher_pd_courses t JOIN courses c ON t.course_id = c.course_id WHERE t.district_id = 101;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Update the gender of multiple players", "schema": "CREATE TABLE Players (PlayerID INT PRIMARY KEY, Name VARCHAR(50), Age INT, Gender VARCHAR(10)); INSERT INTO Players (PlayerID, Name, Age, Gender) VALUES (1, 'John Doe', 25, 'Male'); INSERT INTO Players (PlayerID, Name, Age, Gender) VALUES (2, 'Jane Doe', 30, 'Female'); INSERT INTO Players (PlayerID, Name, Age, Gender) VALUES (3, 'Alex Smith', 22, 'Male');", "sql": "UPDATE Players SET Gender = 'Non-binary' WHERE PlayerID IN (1, 3);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the total number of comments on posts in the 'beauty' category on Instagram?", "schema": "CREATE TABLE post_data (post_id INT, category VARCHAR(50), platform VARCHAR(20)); INSERT INTO post_data (post_id, category, platform) VALUES (1, 'beauty', 'Instagram'), (2, 'fashion', 'Instagram'); CREATE TABLE post_comments (comment_id INT, post_id INT, platform VARCHAR(20)); INSERT INTO post_comments (comment_id, post_id, platform) VALUES (1, 1, 'Instagram'), (2, 1, 'Instagram'), (3, 2, 'Instagram');", "sql": "SELECT SUM(comment_id) FROM post_comments INNER JOIN post_data ON post_comments.post_id = post_data.post_id WHERE post_data.category = 'beauty' AND post_data.platform = 'Instagram';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "Find the number of unique artifact types per excavation site?", "schema": "CREATE TABLE Sites (SiteID INT, SiteName TEXT); INSERT INTO Sites (SiteID, SiteName) VALUES (1, 'Site-A'), (2, 'Site-B'), (3, 'Site-C'); CREATE TABLE Artifacts (ArtifactID INT, ArtifactName TEXT, SiteID INT, ArtifactType TEXT); INSERT INTO Artifacts (ArtifactID, ArtifactName, SiteID, ArtifactType) VALUES (1, 'Pottery Shard', 1, 'Ceramic'), (2, 'Bronze Arrowhead', 2, 'Metal'), (3, 'Flint Tool', 3, 'Stone'), (4, 'Ancient Coin', 1, 'Metal'), (5, 'Stone Hammer', 2, 'Stone');", "sql": "SELECT Sites.SiteName, COUNT(DISTINCT Artifacts.ArtifactType) AS UniqueArtifactTypes FROM Sites INNER JOIN Artifacts ON Sites.SiteID = Artifacts.SiteID GROUP BY Sites.SiteName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the December 15, 2002 game?", "schema": "CREATE TABLE table_name_66 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_66 WHERE date = 'december 15, 2002';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Date has a Surface of clay, and a Score of 6–3, 5–7, 2–6?", "schema": "CREATE TABLE table_name_89 (date VARCHAR, surface VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_89 WHERE surface = 'clay' AND score = '6–3, 5–7, 2–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 1).", "schema": null, "sql": "SELECT 1 AS one WHERE 1 IN (SELECT 1);", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 1 AS one WHERE 1 IN (SELECT 1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 193).", "schema": null, "sql": "-- Create the operator class\nCREATE OPERATOR CLASS gist_bytea_ops\nDEFAULT FOR TYPE bytea USING gist\nAS\n\tOPERATOR\t1\t< ,\n\tOPERATOR\t2\t<= ,\n\tOPERATOR\t3\t= ,\n\tOPERATOR\t4\t>= ,\n\tOPERATOR\t5\t> ,\n\tOPERATOR\t6\t<> ,\n\tFUNCTION\t1\tgbt_bytea_consistent (internal, bytea, int2, oid, internal),\n\tFUNCTION\t2\tgbt_bytea_union (internal, internal),\n\tFUNCTION\t3\tgbt_bytea_compress (internal),\n\tFUNCTION\t4\tgbt_var_decompress (internal),\n\tFUNCTION\t5\tgbt_bytea_penalty (internal, internal, internal),\n\tFUNCTION\t6\tgbt_bytea_picksplit (internal, internal),\n\tFUNCTION\t7\tgbt_bytea_same (gbtreekey_var, gbtreekey_var, internal),\n\tFUNCTION\t9\tgbt_var_fetch (internal),\n\tFUNCTION\t11\tgbt_bytea_sortsupport (internal),\n\tFUNCTION\t12 (\"any\", \"any\") gist_translate_cmptype_btree (int),\n\tSTORAGE\t\t\tgbtreekey_var;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 773, "num_statements": 1} {"question": "What is the maximum ticket price for an art exhibit in Paris?", "schema": "CREATE TABLE Exhibits (exhibit_id INT, city VARCHAR(50), price DECIMAL(5,2)); INSERT INTO Exhibits (exhibit_id, city, price) VALUES (1, 'Paris', 45.99), (2, 'London', 32.49), (3, 'Berlin', 50.00);", "sql": "SELECT MAX(price) FROM Exhibits WHERE city = 'Paris';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (plan_hypertable_inline, item 1).", "schema": null, "sql": "-- test hypertable classification when query is in an inlineable function\n\n\\set PREFIX 'EXPLAIN (buffers off, costs off)'\n\nCREATE TABLE test (a int, b bigint NOT NULL);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Which cetacean species have been sighted in the same location more than once in the Survey table?", "schema": "CREATE TABLE Survey ( id INT PRIMARY KEY, species_id INT, location VARCHAR(255), sighted_date DATE); CREATE TABLE Species ( id INT PRIMARY KEY, name VARCHAR(255), family VARCHAR(255), population INT);", "sql": "SELECT Survey.location, Species.name FROM Survey JOIN Species ON Survey.species_id = Species.id GROUP BY Survey.location, Species.name HAVING COUNT(Survey.id) > 1 WHERE Species.family = 'Cetacean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "What was the average monthly CO2 emission for each manufacturing facility in 2021?", "schema": "CREATE TABLE co2_emissions (facility_id INT, facility_name VARCHAR(255), emission_date DATE, co2_emission DECIMAL(10,2)); INSERT INTO co2_emissions (facility_id, facility_name, emission_date, co2_emission) VALUES (1, 'Facility A', '2021-01-01', 500.00), (2, 'Facility B', '2021-02-01', 700.00), (3, 'Facility C', '2021-03-01', 800.00);", "sql": "SELECT facility_name, AVG(co2_emission) as avg_monthly_emission FROM co2_emissions WHERE emission_date BETWEEN '2021-01-01' AND '2021-12-31' GROUP BY facility_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the tyres for hans binder", "schema": "CREATE TABLE table_name_46 (tyres VARCHAR, driver VARCHAR)", "sql": "SELECT tyres FROM table_name_46 WHERE driver = 'hans binder';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "How many 'safety_incidents' were reported in 'Site B' in the 'SafetyIncidents' table for '2022'?", "schema": "CREATE TABLE SafetyIncidents (id INT, site VARCHAR(255), year INT, safety_incidents INT); INSERT INTO SafetyIncidents (id, site, year, safety_incidents) VALUES (1, 'Site A', 2021, 2), (2, 'Site B', 2022, 3), (3, 'Site A', 2022, 1);", "sql": "SELECT SUM(safety_incidents) FROM SafetyIncidents WHERE site = 'Site B' AND year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Provide the total area of plantations in the Northern region.", "schema": "CREATE TABLE plantations (id INT, name VARCHAR(255), acres FLOAT, region VARCHAR(255)); INSERT INTO plantations (id, name, acres, region) VALUES (1, 'Maplewood', 500.3, 'North'); INSERT INTO plantations (id, name, acres, region) VALUES (2, 'Pinehill', 700.5, 'South');", "sql": "SELECT SUM(acres) FROM plantations WHERE region = 'North';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date for the Away team University?", "schema": "CREATE TABLE table_name_73 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_73 WHERE away_team = 'university';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 265).", "schema": null, "sql": "select interval '-9223372036854775808 microseconds -0.1 days';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '-9223372036854775808 microseconds -0.1 days') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "List the names and launch dates of satellites manufactured by Japanese companies.", "schema": "CREATE TABLE Manufacturer (name VARCHAR(50), country VARCHAR(50), domain VARCHAR(20)); INSERT INTO Manufacturer (name, country, domain) VALUES ('Mitsubishi Heavy Industries', 'Japan', 'Aerospace'); INSERT INTO Manufacturer (name, country, domain) VALUES ('Nissan Space Agency', 'Japan', 'Aerospace');", "sql": "SELECT s.name, s.launch_date FROM Satellite s INNER JOIN Manufacturer m ON s.manufacturer = m.name WHERE m.country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "How many walruses are in the Arctic Ocean?", "schema": "CREATE TABLE Animals (name VARCHAR(50), species VARCHAR(50), location VARCHAR(50)); INSERT INTO Animals (name, species, location) VALUES ('Seal 1', 'Seal', 'Arctic Ocean'), ('Seal 2', 'Seal', 'Arctic Ocean'), ('Walrus 1', 'Walrus', 'Arctic Ocean');", "sql": "SELECT COUNT(*) FROM Animals WHERE species = 'Walrus' AND location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the report with rnd being 4", "schema": "CREATE TABLE table_1140074_2 (report VARCHAR, rnd VARCHAR)", "sql": "SELECT report FROM table_1140074_2 WHERE rnd = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which countries did participated in both Friendly and Tournament type competitions.", "schema": "CREATE TABLE competition (country VARCHAR, competition_type VARCHAR)", "sql": "SELECT country FROM competition WHERE competition_type = 'Friendly' INTERSECT SELECT country FROM competition WHERE competition_type = 'Tournament';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Delete the existing high-speed train route from Rome to Milan", "schema": "CREATE TABLE high_speed_rail_routes (id INT PRIMARY KEY, route_name VARCHAR(255), departure_city VARCHAR(255), destination_city VARCHAR(255), distance INT, avg_speed INT);", "sql": "DELETE FROM high_speed_rail_routes WHERE route_name = 'Rome-Milan Express';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which event has Philadelphia, PA as the location and 41 days held?", "schema": "CREATE TABLE table_name_39 (event VARCHAR, location VARCHAR, days_held VARCHAR)", "sql": "SELECT event FROM table_name_39 WHERE location = 'philadelphia, pa' AND days_held = 41;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Error (example 27).", "schema": null, "sql": "/* AttributeError at toplevel used to give segfaults with the traceback\n*/\nCREATE FUNCTION toplevel_attribute_error() RETURNS void AS\n$$\nplpy.nonexistent\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Error.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the top attendance for weeks past 2 on october 29, 1961?", "schema": "CREATE TABLE table_name_57 (attendance INTEGER, week VARCHAR, date VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_57 WHERE week > 2 AND date = 'october 29, 1961';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the maximum speed reached by a shared bike in New York City?", "schema": "CREATE TABLE shared_bikes (bike_id INT, max_speed FLOAT, registration_date TIMESTAMP, city VARCHAR(50)); INSERT INTO shared_bikes (bike_id, max_speed, registration_date, city) VALUES (1, 30.0, '2021-01-01', 'New York City'), (2, 35.0, '2021-01-02', 'New York City');", "sql": "SELECT MAX(max_speed) FROM shared_bikes WHERE city = 'New York City';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the control trailers for 1931 with builder of grcw", "schema": "CREATE TABLE table_name_45 (Control VARCHAR, year VARCHAR, builder VARCHAR)", "sql": "SELECT Control AS trailers FROM table_name_45 WHERE year = '1931' AND builder = 'grcw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Compare the CO2 emissions of the transportation sector in Japan and South Africa.", "schema": "CREATE TABLE co2_emissions (country VARCHAR(20), sector VARCHAR(20), co2_emissions INT); INSERT INTO co2_emissions (country, sector, co2_emissions) VALUES ('Japan', 'transportation', 240000), ('South Africa', 'transportation', 180000);", "sql": "SELECT co2_emissions FROM co2_emissions WHERE country = 'Japan' INTERSECT SELECT co2_emissions FROM co2_emissions WHERE country = 'South Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position did Garion Weller hold in 2012?", "schema": "CREATE TABLE table_name_23 (position VARCHAR)", "sql": "SELECT position FROM table_name_23 WHERE 2012 = 'garion weller';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What was the average speed of Vessel A?", "schema": "CREATE TABLE Vessels (ID VARCHAR(10), Name VARCHAR(20), Type VARCHAR(20), Max_Speed FLOAT); INSERT INTO Vessels (ID, Name, Type, Max_Speed) VALUES ('1', 'Vessel A', 'Cargo', 20.5);", "sql": "SELECT AVG(Max_Speed) FROM Vessels WHERE Name = 'Vessel A';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Delete all routes associated with vessels that have a license_number of a captain who is under 30 years old.", "schema": "CREATE TABLE Vessel (id INT, name VARCHAR(50), type VARCHAR(50), length FLOAT); CREATE TABLE Captain (id INT, name VARCHAR(50), age INT, license_number VARCHAR(50), VesselId INT); CREATE TABLE Route (id INT, departure_port VARCHAR(50), arrival_port VARCHAR(50), distance FLOAT, VesselId INT);", "sql": "DELETE FROM Route WHERE VesselId IN (SELECT VesselId FROM Captain WHERE age < 30);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many lawyers are in the 'lawyers' table?", "schema": "CREATE TABLE lawyers (id INT, name VARCHAR(50), is_pro_bono BOOLEAN); INSERT INTO lawyers (id, name, is_pro_bono) VALUES (1, 'John Smith', FALSE), (2, 'Jane Doe', TRUE), (3, 'Michael Lee', FALSE);", "sql": "SELECT COUNT(*) FROM lawyers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Create a table for health equity metrics by race", "schema": "CREATE TABLE health_equity_race (id INT PRIMARY KEY, state VARCHAR(2), year INT, race VARCHAR(20), disparity_rate FLOAT);", "sql": "CREATE TABLE if not exists health_equity_race_new AS SELECT * FROM health_equity_race WHERE FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the total number of years that have a score of 3–6, 4–6, 2–6.", "schema": "CREATE TABLE table_2201724_1 (year VARCHAR, score_in_the_final VARCHAR)", "sql": "SELECT COUNT(year) FROM table_2201724_1 WHERE score_in_the_final = '3–6, 4–6, 2–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many fatalities does Italy have?", "schema": "CREATE TABLE table_name_50 (fatalities VARCHAR, country VARCHAR)", "sql": "SELECT fatalities FROM table_name_50 WHERE country = 'italy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the average pages for ISBN of 978-0-9766580-5-4", "schema": "CREATE TABLE table_name_77 (pages INTEGER, isbn VARCHAR)", "sql": "SELECT AVG(pages) FROM table_name_77 WHERE isbn = '978-0-9766580-5-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Which investment strategies have had a total transaction value of over 50000 in the past month in an asset management firm?", "schema": "CREATE TABLE investment_strategies (strategy_id INT, name VARCHAR(255)); CREATE TABLE asset_management_transactions (transaction_id INT, strategy_id INT, amount DECIMAL(10,2), trans_date DATE);", "sql": "SELECT investment_strategies.name FROM investment_strategies INNER JOIN asset_management_transactions ON investment_strategies.strategy_id = asset_management_transactions.strategy_id WHERE asset_management_transactions.trans_date >= NOW() - INTERVAL '1 month' GROUP BY investment_strategies.name HAVING SUM(asset_management_transactions.amount) > 50000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 353, "num_statements": 1} {"question": "Which energy efficiency projects have type 'Insulation'?", "schema": "CREATE TABLE efficiency_type (name TEXT, location TEXT, type TEXT); INSERT INTO efficiency_type (name, location, type) VALUES ('Project A', 'Country O', 'Lighting'), ('Project B', 'Country P', 'Insulation'), ('Project C', 'Country Q', 'HVAC');", "sql": "SELECT * FROM efficiency_type WHERE type = 'Insulation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 252).", "schema": null, "sql": "SELECT 'hawkeye' NOT LIKE 'H%' AS \"true\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'hawkeye' NOT LIKE 'H%' AS \"true\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total production of indigenous food systems in each continent?", "schema": "CREATE TABLE indigenous_production (continent VARCHAR(255), production INT); INSERT INTO indigenous_production (continent, production) VALUES ('Continent1', 2500), ('Continent2', 3200), ('Continent3', 1800); CREATE VIEW indigenous_systems_view AS SELECT * FROM indigenous_production WHERE production > 1500;", "sql": "SELECT continent FROM indigenous_systems_view;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the country with less than 9.033 for swimsuit, 8.611 for interview and preliminary is less than 8.87?", "schema": "CREATE TABLE table_name_50 (country VARCHAR, preliminary VARCHAR, swimsuit VARCHAR, interview VARCHAR)", "sql": "SELECT country FROM table_name_50 WHERE swimsuit < 9.033 AND interview = 8.611 AND preliminary < 8.87;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the number of articles about immigration published in \"El País\" in the first quarter of 2021?", "schema": "CREATE TABLE articles (id INT, title TEXT, publication TEXT, year INT, month INT, day INT, topic TEXT); INSERT INTO articles (id, title, publication, year, month, day, topic) VALUES (1, 'Article 1', 'El País', 2021, 1, 1, 'Immigration'); INSERT INTO articles (id, title, publication, year, month, day, topic) VALUES (2, 'Article 2', 'El País', 2021, 1, 2, 'Immigration'); INSERT INTO articles (id, title, publication, year, month, day, topic) VALUES (3, 'Article 3', 'El País', 2021, 2, 1, 'Politics');", "sql": "SELECT COUNT(*) FROM articles WHERE publication = 'El País' AND topic = 'Immigration' AND year = 2021 AND month BETWEEN 1 AND 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Out of that has the Source of united nations, and the Rank larger than 47, and the Year smaller than 2003?", "schema": "CREATE TABLE table_name_31 (out_of VARCHAR, year VARCHAR, source VARCHAR, rank VARCHAR)", "sql": "SELECT out_of FROM table_name_31 WHERE source = 'united nations' AND rank > 47 AND year < 2003;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total production volume of Samarium in Canada and the USA combined?", "schema": "CREATE TABLE Samarium_Production (id INT, year INT, country VARCHAR(20), production_volume INT);", "sql": "SELECT SUM(production_volume) FROM Samarium_Production WHERE country IN ('Canada', 'USA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 27).", "schema": null, "sql": "CREATE OPERATOR <= (\n LEFTARG = CITEXT,\n RIGHTARG = CITEXT,\n NEGATOR = >,\n COMMUTATOR = >=,\n PROCEDURE = citext_le,\n RESTRICT = scalarltsel,\n JOIN = scalarltjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 205, "num_statements": 1} {"question": "Delete all records from the 'weapons' table where the 'origin' is 'Russia'", "schema": "CREATE TABLE weapons (id INT PRIMARY KEY, name VARCHAR(255), origin VARCHAR(255)); INSERT INTO weapons (id, name, origin) VALUES (1, 'AK-47', 'Russia'), (2, 'RPG-7', 'Russia'), (3, 'Mig-29', 'Russia');", "sql": "DELETE FROM weapons WHERE origin = 'Russia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "What is the minimum water consumption in the public sector in Alberta?", "schema": "CREATE TABLE public_sector (id INT, province VARCHAR(20), water_consumption FLOAT); INSERT INTO public_sector (id, province, water_consumption) VALUES (1, 'Alberta', 100000000), (2, 'Alberta', 90000000), (3, 'Alberta', 80000000);", "sql": "SELECT MIN(water_consumption) FROM public_sector WHERE province = 'Alberta';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Weight of the player with a Block larger than 328, Spike greater than 375 and Height larger than 207?", "schema": "CREATE TABLE table_name_28 (weight VARCHAR, spike VARCHAR, block VARCHAR, height VARCHAR)", "sql": "SELECT COUNT(weight) FROM table_name_28 WHERE block > 328 AND height > 207 AND spike > 375;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the mortality rate for each infectious disease in Africa?", "schema": "CREATE TABLE MortalityRates (Disease VARCHAR(255), Continent VARCHAR(255), MortalityRate FLOAT); INSERT INTO MortalityRates (Disease, Continent, MortalityRate) VALUES ('Malaria', 'Africa', 0.6), ('Tuberculosis', 'Africa', 0.3), ('HIV/AIDS', 'Africa', 0.7), ('Hepatitis', 'Africa', 0.1), ('Influenza', 'Africa', 0.01);", "sql": "SELECT Disease, MortalityRate FROM MortalityRates WHERE Continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'updatable_views' (example 161).", "schema": null, "sql": "DROP FUNCTION rw_view1_trig_fn();", "explanation": "PL/pgSQL object from PostgreSQL core test for Updatable Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show different carriers of phones together with the number of phones with each carrier.", "schema": "CREATE TABLE phone (Carrier VARCHAR)", "sql": "SELECT Carrier, COUNT(*) FROM phone GROUP BY Carrier;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 75).", "schema": null, "sql": "-- Unicode variant\n\nCREATE FUNCTION stupid7u() RETURNS trigger\nAS $$\n TD[\"new\"] = {'v': 'foo', 'a': 'bar'}\n return \"MODIFY\"\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Catalog Number is for the Japan region?", "schema": "CREATE TABLE table_name_59 (catalog VARCHAR, region VARCHAR)", "sql": "SELECT catalog FROM table_name_59 WHERE region = 'japan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "List the employees who have a higher salary than the average salary in the Sales department.", "schema": "CREATE TABLE Employees (EmployeeID INT, Salary DECIMAL(10,2), Department VARCHAR(50)); INSERT INTO Employees (EmployeeID, Salary, Department) VALUES (1, 90000.00, 'Sales'), (2, 80000.00, 'Sales');", "sql": "SELECT * FROM Employees WHERE Salary > (SELECT AVG(Salary) FROM Employees WHERE Department = 'Sales');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Update the location of a heritage site in the 'heritage_sites' table", "schema": "CREATE TABLE heritage_sites (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), year INT);", "sql": "UPDATE heritage_sites SET location = 'Scotland' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many for christer tornell where the total is 30?", "schema": "CREATE TABLE table_28677723_11 (christer_tornell VARCHAR, total VARCHAR)", "sql": "SELECT COUNT(christer_tornell) FROM table_28677723_11 WHERE total = 30;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of research stations in the Arctic region?", "schema": "CREATE TABLE ResearchStations (station_name varchar(50), region varchar(50), operational_status varchar(50));", "sql": "SELECT COUNT(DISTINCT station_name) AS total_stations FROM ResearchStations WHERE region LIKE '%Arctic%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result of the International Friendly competition on May 15, 1966?", "schema": "CREATE TABLE table_name_89 (result VARCHAR, competition VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_89 WHERE competition = 'international friendly' AND date = 'may 15, 1966';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Identify the economic diversification efforts in West Africa with an investment of over 5 million USD.", "schema": "CREATE TABLE DiversificationEfforts (id INT, effort_name TEXT, location TEXT, investment FLOAT); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (1, 'Renewable Energy West Africa', 'West Africa', 5500000); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (2, 'Tourism Development West Africa', 'West Africa', 4000000); INSERT INTO DiversificationEfforts (id, effort_name, location, investment) VALUES (3, 'Mining West Africa', 'West Africa', 3500000);", "sql": "SELECT effort_name, location FROM DiversificationEfforts WHERE investment > 5000000 AND location LIKE '%West Africa%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "List the number of community development projects in 'rural_development' database, grouped by region and project type.", "schema": "CREATE TABLE projects (id INT, region TEXT, project_type TEXT, start_date DATE, end_date DATE);", "sql": "SELECT region, project_type, COUNT(*) FROM projects GROUP BY region, project_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: where was the abbas family and the pickering family located", "schema": "CREATE TABLE table_19897294_9 (location_s_ VARCHAR, family_families VARCHAR)", "sql": "SELECT location_s_ FROM table_19897294_9 WHERE family_families = 'The Abbas Family and The Pickering Family';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Delete size records for customers not fitting standard sizes", "schema": "CREATE TABLE CustomerSizes (CustomerID INT, Size TEXT); INSERT INTO CustomerSizes (CustomerID, Size) VALUES (1, 'XS'), (2, 'S'), (3, 'M');", "sql": "DELETE FROM CustomerSizes WHERE Size NOT IN ('XS', 'S', 'M');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are McCain's Percent when Obama has 36.47%?", "schema": "CREATE TABLE table_20688030_1 (mccain_percentage VARCHAR, obama_percentage VARCHAR)", "sql": "SELECT mccain_percentage FROM table_20688030_1 WHERE obama_percentage = '36.47%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "pgTAP test for Runtests (assertion 7).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION whatever.setup() RETURNS SETOF TEXT AS $$\n SELECT collect_tap(ARRAY[\n pass('setup'),\n (SELECT is( MAX(id), NULL, 'Should be nothing in the test table') FROM whatever.foo)\n ]);\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Runtests.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 239, "num_statements": 2} {"question": "What is the number of customers with financial capability in South America?", "schema": "CREATE TABLE financial_capability_south_america (id INT, customer_id INT, country VARCHAR(255), capable BOOLEAN); INSERT INTO financial_capability_south_america (id, customer_id, country, capable) VALUES (1, 6001, 'Brazil', true), (2, 6002, 'Argentina', false), (3, 6003, 'Colombia', true);", "sql": "SELECT COUNT(*) FROM financial_capability_south_america WHERE capable = true AND country IN ('Brazil', 'Argentina', 'Colombia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country is dated October 20, 1976?", "schema": "CREATE TABLE table_name_83 (country VARCHAR, date VARCHAR)", "sql": "SELECT country FROM table_name_83 WHERE date = 'october 20, 1976';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Calculate the average food safety inspection score for restaurants in a particular city in 2020.", "schema": "CREATE TABLE restaurant (restaurant_id INT, name TEXT, city TEXT, inspection_score INT); INSERT INTO restaurant (restaurant_id, name, city, inspection_score) VALUES (1, 'Restaurant A', 'San Francisco', 90), (2, 'Restaurant B', 'New York', 85), (3, 'Restaurant C', 'San Francisco', 80), (4, 'Restaurant D', 'Chicago', 95), (5, 'Restaurant E', 'New York', 88);", "sql": "SELECT city, AVG(inspection_score) AS avg_inspection_score FROM restaurant WHERE city = 'San Francisco' AND EXTRACT(YEAR FROM last_inspection_date) = 2020 GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "Calculate the landfill capacity utilization for the city of Los Angeles in 2020", "schema": "CREATE TABLE landfill_capacity (city VARCHAR(20), year INT, landfill_capacity FLOAT, waste_generated FLOAT);INSERT INTO landfill_capacity (city, year, landfill_capacity, waste_generated) VALUES ('Los Angeles', 2019, 5000000, 3000000), ('Los Angeles', 2020, 5000000, 3200000), ('Los Angeles', 2021, 5000000, 3400000), ('San Diego', 2019, 4000000, 2500000), ('San Diego', 2020, 4000000, 2700000), ('San Diego', 2021, 4000000, 2900000);", "sql": "SELECT (waste_generated / landfill_capacity) * 100 FROM landfill_capacity WHERE city = 'Los Angeles' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the time/retired for Grid of 13", "schema": "CREATE TABLE table_name_59 (time_retired VARCHAR, grid VARCHAR)", "sql": "SELECT time_retired FROM table_name_59 WHERE grid = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the average revenue per day for a specific restaurant?", "schema": "CREATE TABLE daily_revenue (restaurant_id INT, revenue FLOAT, date DATE); INSERT INTO daily_revenue (restaurant_id, revenue, date) VALUES (1, 5000.00, '2022-01-01'), (1, 6000.00, '2022-01-02'), (1, 4000.00, '2022-01-03');", "sql": "SELECT AVG(revenue) as avg_daily_revenue FROM daily_revenue WHERE restaurant_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 164).", "schema": null, "sql": "select interval '1 year 2 mons 3 days 04:05:06.699999';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '1 year 2 mons 3 days 04:05:06.699999') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 90).", "schema": null, "sql": "-- test is_descendent_of\nSELECT * FROM check_test(\n is_descendent_of( 'hide', 'h_child1', 'hide', 'h_parent', 1, 'Lookie' ),\n true,\n 'is_descendent_of(csch, ctab, psch, ptab, 1, desc)',\n 'Lookie',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the location when control is private and founded is 1870", "schema": "CREATE TABLE table_15851155_1 (location VARCHAR, control VARCHAR, founded VARCHAR)", "sql": "SELECT location FROM table_15851155_1 WHERE control = 'Private' AND founded = 1870;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least week for l 26–42", "schema": "CREATE TABLE table_26401898_2 (week INTEGER, final_score VARCHAR)", "sql": "SELECT MIN(week) FROM table_26401898_2 WHERE final_score = 'L 26–42';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Which community development initiatives were completed in '2021'?", "schema": "CREATE TABLE community_development (id INT, initiative_name VARCHAR(50), sector VARCHAR(50), start_date DATE, end_date DATE, budget FLOAT); INSERT INTO community_development (id, initiative_name, sector, start_date, end_date, budget) VALUES (1, 'Youth Empowerment Program', 'Community Development', '2019-01-01', '2020-12-31', 250000);", "sql": "SELECT initiative_name FROM community_development WHERE YEAR(end_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the location that led to a record of 9-0-0?", "schema": "CREATE TABLE table_name_95 (location VARCHAR, record VARCHAR)", "sql": "SELECT location FROM table_name_95 WHERE record = '9-0-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has 0-6700-30838-2-9 for a catalog?", "schema": "CREATE TABLE table_name_88 (date VARCHAR, catalog VARCHAR)", "sql": "SELECT date FROM table_name_88 WHERE catalog = '0-6700-30838-2-9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the high capacity that has an average under 489 and a highest over 778?", "schema": "CREATE TABLE table_name_90 (capacity INTEGER, average VARCHAR, highest VARCHAR)", "sql": "SELECT MAX(capacity) FROM table_name_90 WHERE average < 489 AND highest > 778;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the average delivery time for shipments to 'Africa' after the 15th of each month?", "schema": "CREATE TABLE shipments (id INT, delivery_date DATE, destination VARCHAR(20), delivery_time INT); INSERT INTO shipments (id, delivery_date, destination, delivery_time) VALUES (1, '2022-02-20', 'Africa', 4), (2, '2022-03-07', 'Europe', 6), (3, '2022-02-25', 'Africa', 5);", "sql": "SELECT AVG(delivery_time) FROM shipments WHERE delivery_date >= DATEADD(day, 15, EOMONTH(delivery_date)) AND destination = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the names of all routes in alphabetic order.", "schema": "CREATE TABLE Delivery_Routes (route_name VARCHAR)", "sql": "SELECT route_name FROM Delivery_Routes ORDER BY route_name;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "How many volunteers signed up in 2021 from Australia?", "schema": "CREATE TABLE volunteers (id INT, name TEXT, country TEXT, signup_date DATE); INSERT INTO volunteers (id, name, country, signup_date) VALUES (1, 'Alex Johnson', 'Australia', '2021-02-12'), (2, 'Bruce Lee', 'USA', '2020-05-23');", "sql": "SELECT COUNT(*) FROM volunteers WHERE country = 'Australia' AND YEAR(signup_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 438).", "schema": null, "sql": "UPDATE temporal_rng\n SET valid_at = CASE WHEN lower(valid_at) = '2018-01-01' THEN daterange('2018-01-01', '2018-01-05')\n WHEN lower(valid_at) = '2018-02-01' THEN daterange('2018-01-05', '2018-03-01') END\n WHERE id = '[6,7)';", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "Which users have participated in 'Zumba' workouts in the 'workout_data' table?", "schema": "CREATE TABLE workout_data (user_id INT, workout_type VARCHAR(20), duration INT); INSERT INTO workout_data (user_id, workout_type, duration) VALUES (1, 'Running', 30), (1, 'Cycling', 60), (2, 'Yoga', 45), (3, 'Pilates', 50), (6, 'Zumba', 75), (7, 'Zumba', 90);", "sql": "SELECT DISTINCT user_id FROM workout_data WHERE workout_type = 'Zumba';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Find the maximum price of sustainable footwear", "schema": "CREATE TABLE footwear (id INT, category VARCHAR(50), subcategory VARCHAR(50), is_sustainable BOOLEAN, price DECIMAL(5,2)); INSERT INTO footwear (id, category, subcategory, is_sustainable, price) VALUES (1, 'Footwear', 'Sneakers', TRUE, 120.00), (2, 'Footwear', 'Sneakers', TRUE, 150.00), (3, 'Footwear', 'Sandals', FALSE, 60.00), (4, 'Footwear', 'Boots', TRUE, 200.00), (5, 'Footwear', 'Loafers', FALSE, 90.00);", "sql": "SELECT MAX(price) FROM footwear WHERE subcategory = 'Sneakers' AND is_sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which club is associated with Hafiz Abu Sujad?", "schema": "CREATE TABLE table_name_80 (club VARCHAR, name VARCHAR)", "sql": "SELECT club FROM table_name_80 WHERE name = 'hafiz abu sujad';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average height for hewitt class, with prom less than 86, and a Peak of gragareth?", "schema": "CREATE TABLE table_name_45 (height__m_ INTEGER, peak VARCHAR, class VARCHAR, prom__m_ VARCHAR)", "sql": "SELECT AVG(height__m_) FROM table_name_45 WHERE class = 'hewitt' AND prom__m_ < 86 AND peak = 'gragareth';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "What is the total number of defense diplomacy events in each region?", "schema": "CREATE TABLE Defense_Diplomacy (Event_ID INT, Event_Name VARCHAR(50), Start_Date DATE, Region VARCHAR(50)); INSERT INTO Defense_Diplomacy (Event_ID, Event_Name, Start_Date, Region) VALUES (1, 'Defense Summit', '2000-01-01', 'North America');", "sql": "SELECT Region, COUNT(*) as Total_Events FROM Defense_Diplomacy GROUP BY Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average distance from rural healthcare facilities to the nearest urban center in South Africa and Kenya?", "schema": "CREATE TABLE facilities (name TEXT, location TEXT, distance INTEGER); INSERT INTO facilities (name, location, distance) VALUES ('Facility X', 'Rural South Africa', 100), ('Facility Y', 'Rural Kenya', 80);", "sql": "SELECT AVG(distance) FROM facilities WHERE location LIKE 'Rural%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the mccain % for dukes", "schema": "CREATE TABLE table_15929156_3 (mccain__percentage VARCHAR, county VARCHAR)", "sql": "SELECT mccain__percentage FROM table_15929156_3 WHERE county = 'Dukes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of articles published in the \"articles\" table that have more than 500 words?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), word_count INT, publication_date DATE, category VARCHAR(50));", "sql": "SELECT COUNT(*) FROM articles WHERE word_count > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 267).", "schema": null, "sql": "select jsonb_path_query('{}', '$.double()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('{}', '$.double()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Find the total number of games and unique genres for each platform, excluding PC games.", "schema": "CREATE TABLE Games (GameID INT, GameName VARCHAR(50), Platform VARCHAR(10), GameGenre VARCHAR(20));", "sql": "SELECT Platform, COUNT(DISTINCT GameGenre) AS Unique_Genres, COUNT(*) AS Total_Games FROM Games WHERE Platform != 'PC' GROUP BY Platform;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the season record when the location attendance was Air Canada Centre 19,800?", "schema": "CREATE TABLE table_name_26 (record VARCHAR, location_attendance VARCHAR)", "sql": "SELECT record FROM table_name_26 WHERE location_attendance = 'air canada centre 19,800';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the biggest draws, for wins under 4, and points of 20-18?", "schema": "CREATE TABLE table_name_29 (draws INTEGER, points VARCHAR, wins VARCHAR)", "sql": "SELECT MAX(draws) FROM table_name_29 WHERE points = '20-18' AND wins < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of qualifying scores when the final score is 16.625?", "schema": "CREATE TABLE table_name_7 (score_qualifying VARCHAR, score_final VARCHAR)", "sql": "SELECT COUNT(score_qualifying) FROM table_name_7 WHERE score_final = 16.625;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average number of vaccinations administered per day, before January 15, 2021?", "schema": "CREATE TABLE vaccinations (id INT PRIMARY KEY, patient_id INT, vaccine VARCHAR(255), date DATE); INSERT INTO vaccinations (id, patient_id, vaccine, date) VALUES (1, 12345, 'Pfizer-BioNTech', '2021-01-05'); INSERT INTO vaccinations (id, patient_id, vaccine, date) VALUES (2, 67890, 'Moderna', '2021-01-10'); INSERT INTO vaccinations (id, patient_id, vaccine, date) VALUES (3, 12345, 'Pfizer-BioNTech', '2021-01-15');", "sql": "SELECT AVG(vaccinations_per_day) FROM (SELECT COUNT(*) / (DATEDIFF('2021-01-15', date)) AS vaccinations_per_day FROM vaccinations WHERE date < '2021-01-15') AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "What is the average word count of articles by gender and region in 'news_articles' table?", "schema": "CREATE TABLE news_articles (id INT, title VARCHAR(100), publication_date DATE, author VARCHAR(50), word_count INT, gender VARCHAR(10), region VARCHAR(50)); INSERT INTO news_articles (id, title, publication_date, author, word_count, gender, region) VALUES (1, 'Article 1', '2022-01-01', 'John Doe', 500, 'Male', 'North America'), (2, 'Article 2', '2022-01-02', 'Jane Smith', 700, 'Female', 'Europe');", "sql": "SELECT gender, region, AVG(word_count) as avg_word_count FROM news_articles GROUP BY gender, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the viewers where the rating is 5.3?", "schema": "CREATE TABLE table_11178271_1 (viewers__m_ VARCHAR, rating VARCHAR)", "sql": "SELECT viewers__m_ FROM table_11178271_1 WHERE rating = '5.3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 18).", "schema": null, "sql": "select quote_literal('abc''');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select quote_literal('abc''')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 133).", "schema": null, "sql": "select '$ ? (@.a < +1e-1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$ ? (@.a < +1e-1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Find the number of unique volunteers for each program?", "schema": "CREATE TABLE Volunteers (VolunteerID INT, ProgramID INT); INSERT INTO Volunteers (VolunteerID, ProgramID) VALUES (1, 1), (2, 1), (3, 2), (4, 3), (5, 3);", "sql": "SELECT ProgramID, COUNT(DISTINCT VolunteerID) FROM Volunteers GROUP BY ProgramID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the most popular color of sustainable fashion products?", "schema": "CREATE TABLE sales (id INT, product_type VARCHAR(20), color VARCHAR(20), revenue DECIMAL); INSERT INTO sales (id, product_type, color, revenue) VALUES (1, 'sustainable', 'green', 100.00), (2, 'regular', 'blue', 200.00), (3, 'sustainable', 'green', 300.00), (4, 'regular', 'red', 400.00), (5, 'sustainable', 'blue', 500.00);", "sql": "SELECT color, COUNT(*) FROM sales WHERE product_type = 'sustainable' GROUP BY color ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Find the difference between the average response times for 'Fire' and 'Medical' calls in each neighborhood.", "schema": "CREATE TABLE ResponseTimes (call_id INT, call_type VARCHAR(10), neighborhood VARCHAR(20), response_time INT); INSERT INTO ResponseTimes VALUES (1, 'Fire', 'Parkside', 5), (2, 'Medical', 'Parkside', 7), (3, 'Fire', 'Downtown', 6), (4, 'Medical', 'Downtown', 8);", "sql": "SELECT neighborhood, AVG(response_time) - (SELECT AVG(response_time) FROM ResponseTimes r2 WHERE r2.neighborhood = r1.neighborhood AND r2.call_type = 'Medical') AS difference FROM ResponseTimes r1 WHERE r1.call_type = 'Fire' GROUP BY neighborhood;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "How many traffic violations were issued in the state of New York, broken down by county, in the month of July 2022?", "schema": "CREATE TABLE traffic_violations (county VARCHAR(20), violation_count INT, violation_date DATE); INSERT INTO traffic_violations (county, violation_count, violation_date) VALUES ('New York County', 500, '2022-07-01');", "sql": "SELECT county, SUM(violation_count) FROM traffic_violations WHERE violation_date BETWEEN '2022-07-01' AND '2022-07-31' GROUP BY county;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of companies that do not make DVD drive.", "schema": "CREATE TABLE manufacturers (name VARCHAR, code VARCHAR); CREATE TABLE products (Manufacturer VARCHAR, name VARCHAR); CREATE TABLE manufacturers (name VARCHAR)", "sql": "SELECT name FROM manufacturers EXCEPT SELECT T2.name FROM products AS T1 JOIN manufacturers AS T2 ON T1.Manufacturer = T2.code WHERE T1.name = 'DVD drive';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 924).", "schema": null, "sql": "select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '\"numeric\"');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '\"numeric\"')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the record for cotton bowl classic", "schema": "CREATE TABLE table_2517159_1 (record VARCHAR, name_of_bowl VARCHAR)", "sql": "SELECT record FROM table_2517159_1 WHERE name_of_bowl = 'Cotton Bowl Classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the average orbital height of satellites launched by Japan?", "schema": "CREATE TABLE satellites_orbital (id INT, name VARCHAR(255), country VARCHAR(255), orbital_height FLOAT);", "sql": "SELECT AVG(orbital_height) FROM satellites_orbital WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average temperature and humidity for Farm 3?", "schema": "CREATE TABLE weather_data (id INT, location VARCHAR(50), temperature FLOAT, humidity FLOAT, time TIMESTAMP); INSERT INTO weather_data (id, location, temperature, humidity, time) VALUES (1, 'Farm 3', 25.0, 60.0, '2021-01-01 10:00:00');", "sql": "SELECT AVG(temperature), AVG(humidity) FROM weather_data WHERE location = 'Farm 3';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What was the total cost of spacecraft manufactured by 'AeroCorp'?", "schema": "CREATE TABLE SpacecraftManufacturing(company VARCHAR(20), cost INT); INSERT INTO SpacecraftManufacturing(company, cost) VALUES('AeroCorp', 5000000), ('GalacticTech', 7000000), ('SpaceEagle', 6000000);", "sql": "SELECT SUM(cost) FROM SpacecraftManufacturing WHERE company = 'AeroCorp';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which tournament has an Outcome of runner-up, and an Opponent of maša zec peškirič?", "schema": "CREATE TABLE table_name_93 (tournament VARCHAR, outcome VARCHAR, opponent VARCHAR)", "sql": "SELECT tournament FROM table_name_93 WHERE outcome = 'runner-up' AND opponent = 'maša zec peškirič';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What venue listed is dated February 22, 2003?", "schema": "CREATE TABLE table_name_4 (venue VARCHAR, date VARCHAR)", "sql": "SELECT venue FROM table_name_4 WHERE date = 'february 22, 2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total revenue generated from digital music sales in the United States?", "schema": "CREATE TABLE digital_sales (sale_id INT, platform VARCHAR(20), genre VARCHAR(20), units_sold INT, sale_price DECIMAL(5,2)); CREATE TABLE countries (country_code CHAR(2), country_name VARCHAR(50)); INSERT INTO countries (country_code, country_name) VALUES ('US', 'United States');", "sql": "SELECT SUM(d.sale_price * d.units_sold) as total_revenue FROM digital_sales d INNER JOIN countries c ON d.platform = c.country_name WHERE c.country_code = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the difference in funding amount between the latest and earliest round for each company?", "schema": "CREATE TABLE funding_rounds (company_id INT, round_number INT, funding_amount INT); INSERT INTO funding_rounds (company_id, round_number, funding_amount) VALUES (1, 1, 5000000), (1, 2, 7000000), (1, 3, 10000000), (2, 1, 3000000), (2, 2, 4000000), (3, 1, 9000000), (3, 2, 11000000);", "sql": "SELECT company_id, MAX(funding_amount) - MIN(funding_amount) AS funding_difference FROM funding_rounds GROUP BY company_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'varchar' (example 3).", "schema": null, "sql": "INSERT INTO VARCHAR_TBL (f1) VALUES ('A');", "explanation": "DML from PostgreSQL core regression test for Varchar.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 23).", "schema": null, "sql": "create table rtest_empmass (ename char(20), salary numeric);", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average donation amount for donors from Oceania?", "schema": "CREATE TABLE Donors (DonorID int, DonorName varchar(50), Country varchar(50)); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (1, 'John Doe', 'United States'); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (2, 'Jane Smith', 'Australia'); INSERT INTO Donors (DonorID, DonorName, Country) VALUES (3, 'Alice Johnson', 'Japan'); CREATE TABLE Donations (DonationID int, DonorID int, DonationAmount decimal(10, 2)); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (1, 1, 5000); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (2, 2, 1000); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (3, 2, 1500); INSERT INTO Donations (DonationID, DonorID, DonationAmount) VALUES (4, 3, 2000);", "sql": "SELECT AVG(Donations.DonationAmount) as AverageDonationAmount FROM Donors INNER JOIN Donations ON Donations.DonorID = Donors.DonorID WHERE Donors.Country IN ('Australia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "What is the average temperature change in Africa and North America from 2015 to 2020?", "schema": "CREATE TABLE africa_temperature (year INT, avg_temp FLOAT); INSERT INTO africa_temperature (year, avg_temp) VALUES (2015, 20.1), (2016, 20.5), (2017, 21.2), (2018, 20.8), (2019, 21.0), (2020, 21.5); CREATE TABLE north_america_temperature (year INT, avg_temp FLOAT); INSERT INTO north_america_temperature (year, avg_temp) VALUES (2015, 10.1), (2016, 10.5), (2017, 11.2), (2018, 10.8), (2019, 11.0), (2020, 11.5);", "sql": "SELECT AVG(africa_temperature.avg_temp) AS africa_avg_temp, AVG(north_america_temperature.avg_temp) AS north_america_avg_temp FROM africa_temperature, north_america_temperature WHERE africa_temperature.year = north_america_temperature.year AND africa_temperature.year BETWEEN 2015 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 290, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many violent catagories are listed for the year forcible rapes were 1156?", "schema": "CREATE TABLE table_25271777_1 (violent VARCHAR, forcible_rape VARCHAR)", "sql": "SELECT COUNT(violent) FROM table_25271777_1 WHERE forcible_rape = 1156;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the minimum serving size for organic juice?", "schema": "CREATE TABLE Beverages (id INT, is_organic BOOLEAN, category VARCHAR(20), serving_size INT); INSERT INTO Beverages (id, is_organic, category, serving_size) VALUES (1, true, 'juice', 10), (2, false, 'juice', 15), (3, true, 'smoothie', 12);", "sql": "SELECT MIN(serving_size) FROM Beverages WHERE is_organic = true AND category = 'juice';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the average cost of military equipment maintenance per incident for each type of equipment in the last year?", "schema": "CREATE TABLE equipment_maintenance (id INT, incident_number VARCHAR(255), equipment_type VARCHAR(255), state VARCHAR(255), cost DECIMAL(10,2));", "sql": "SELECT equipment_type, AVG(cost) FROM equipment_maintenance WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many Points has Wins larger than 0?", "schema": "CREATE TABLE table_name_24 (points VARCHAR, wins INTEGER)", "sql": "SELECT COUNT(points) FROM table_name_24 WHERE wins > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total square footage of all properties in sustainable urban areas?", "schema": "CREATE TABLE urban_areas (id INT, area VARCHAR(20), sustainable BOOLEAN); INSERT INTO urban_areas (id, area, sustainable) VALUES (1, 'City A', true), (2, 'City B', false), (3, 'City C', true); CREATE TABLE properties (id INT, area VARCHAR(20), size INT); INSERT INTO properties (id, area, size) VALUES (1, 'City A', 1500), (2, 'City B', 2000), (3, 'City A', 1000);", "sql": "SELECT SUM(size) FROM properties JOIN urban_areas ON properties.area = urban_areas.area WHERE urban_areas.sustainable = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 749).", "schema": null, "sql": "SELECT to_number('MCCM', 'RN');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_number('MCCM', 'RN')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "Identify the total number of fish in the North Pacific Ocean, grouped by species, for those species with a population greater than 1000000?", "schema": "CREATE TABLE FishPopulation (Species TEXT, Ocean TEXT, Population INT); INSERT INTO FishPopulation (Species, Ocean, Population) VALUES ('Salmon', 'North Pacific', 800000), ('Tuna', 'North Pacific', 1200000), ('Shark', 'North Pacific', 500000);", "sql": "SELECT Species, SUM(Population) FROM FishPopulation WHERE Ocean = 'North Pacific' GROUP BY Species HAVING SUM(Population) > 1000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Team, when High Rebounds is \"Carl Landry (11)\"?", "schema": "CREATE TABLE table_name_78 (team VARCHAR, high_rebounds VARCHAR)", "sql": "SELECT team FROM table_name_78 WHERE high_rebounds = 'carl landry (11)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum ESG score in the healthcare sector?", "schema": "CREATE TABLE companies (id INT, name VARCHAR(255), sector VARCHAR(255), ESG_score FLOAT); INSERT INTO companies (id, name, sector, ESG_score) VALUES (1, 'Pfizer', 'Healthcare', 90.0), (2, 'Johnson & Johnson', 'Healthcare', 92.5), (3, 'Merck', 'Healthcare', 87.5);", "sql": "SELECT MAX(ESG_score) FROM companies WHERE sector = 'Healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the industry with the highest total funding?", "schema": "CREATE TABLE funding (id INT, company_id INT, amount DECIMAL(10, 2)); CREATE TABLE company (id INT, name VARCHAR(255), industry VARCHAR(255)); INSERT INTO company (id, name, industry) VALUES (1, 'Fintech Inc', 'finance'), (2, 'Startup Corp', 'tech'), (3, 'Green Inc', 'green'); INSERT INTO funding (id, company_id, amount) VALUES (1, 1, 500000.00), (2, 1, 750000.00), (3, 2, 250000.00), (4, 3, 1000000.00);", "sql": "SELECT industry, SUM(amount) AS total_funding FROM funding INNER JOIN company ON funding.company_id = company.id GROUP BY industry ORDER BY total_funding DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the best time of the team that had a qualifying 1 time of 1:01.043?", "schema": "CREATE TABLE table_name_54 (best VARCHAR, qual_1 VARCHAR)", "sql": "SELECT best FROM table_name_54 WHERE qual_1 = '1:01.043';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The Le Mans Porsche team Joest Racing is in which class?", "schema": "CREATE TABLE table_name_53 (class VARCHAR, team VARCHAR)", "sql": "SELECT class FROM table_name_53 WHERE team = 'le mans porsche team joest racing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Update the salary of 'Bob Johnson' in the 'Assembly' department to $65000.", "schema": "CREATE TABLE employees (id INT, name TEXT, department TEXT, salary INT); INSERT INTO employees (id, name, department, salary) VALUES (1, 'John Doe', 'Engineering', 70000), (2, 'Jane Smith', 'Management', 90000), (3, 'Bob Johnson', 'Assembly', 50000), (4, 'Alice Williams', 'Engineering', 75000), (5, 'Charlie Brown', 'Assembly', 55000), (6, 'Janet Doe', 'Quality', 60000), (7, 'Jim Smith', 'Management', 85000), (8, 'Jake Johnson', 'Assembly', 60000);", "sql": "UPDATE employees SET salary = 65000 WHERE id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Insert a new record for a workplace in the state of Illinois without safety violations.", "schema": "CREATE TABLE workplaces (id INT, name TEXT, state TEXT, safety_violation BOOLEAN);", "sql": "INSERT INTO workplaces (id, name, state, safety_violation) VALUES (1, 'WXY Company', 'Illinois', false);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the average budget for projects focused on AI ethics by year?", "schema": "CREATE TABLE project_budget (project_id INT, project_name VARCHAR(255), budget DECIMAL(10,2), year INT); INSERT INTO project_budget (project_id, project_name, budget, year) VALUES (1, 'Ethical AI Research', 500000, 2020), (2, 'AI Ethics Guidelines Development', 750000, 2021);", "sql": "SELECT AVG(budget) as avg_budget, year FROM project_budget WHERE project_name LIKE '%AI ethics%' GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Surface has an Opponent of kerry melville reid, and a Score of 6–3, 2–6, 3–6?", "schema": "CREATE TABLE table_name_53 (surface VARCHAR, opponent VARCHAR, score VARCHAR)", "sql": "SELECT surface FROM table_name_53 WHERE opponent = 'kerry melville reid' AND score = '6–3, 2–6, 3–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the game where is was later than Week 4 of the season and the opponent was the New York Giants?", "schema": "CREATE TABLE table_name_54 (date VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_54 WHERE week > 4 AND opponent = 'new york giants';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "List the auto shows and number of electric vehicle models showcased in each", "schema": "CREATE TABLE AutoShow (id INT, name TEXT); CREATE TABLE ElectricVehicle (id INT, name TEXT, show_id INT); INSERT INTO AutoShow (id, name) VALUES (1, 'LA Auto Show'), (2, 'Detroit Auto Show'); INSERT INTO ElectricVehicle (id, name, show_id) VALUES (1, 'Model 3', 1), (2, 'Bolt', 1), (3, 'Tesla Semi', 2);", "sql": "SELECT AutoShow.name, COUNT(ElectricVehicle.id) FROM AutoShow INNER JOIN ElectricVehicle ON AutoShow.id = ElectricVehicle.show_id GROUP BY AutoShow.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average finish that has 24 as the start, with a year after 1987?", "schema": "CREATE TABLE table_name_89 (finish INTEGER, start VARCHAR, year VARCHAR)", "sql": "SELECT AVG(finish) FROM table_name_89 WHERE start = 24 AND year > 1987;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show an example of PostgreSQL SELECT (example 3).", "schema": null, "sql": "SELECT * FROM (SELECT * FROM mytable FOR UPDATE) ss WHERE col1 = 5;", "explanation": "PostgreSQL SELECT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 67, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 15).", "schema": null, "sql": "SELECT MIN(o), AVG(o) FROM t1p_ones;", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of top-25s for events with 0 wins?", "schema": "CREATE TABLE table_name_15 (top_25 INTEGER, wins INTEGER)", "sql": "SELECT MAX(top_25) FROM table_name_15 WHERE wins < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who captains Portsmouth?", "schema": "CREATE TABLE table_name_9 (captain VARCHAR, team VARCHAR)", "sql": "SELECT captain FROM table_name_9 WHERE team = 'portsmouth';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "How many visitors with disabilities visited our museums in Germany last year?", "schema": "CREATE TABLE Visitors_With_Disabilities (id INT, country VARCHAR(255), year INT, number_of_visitors INT);", "sql": "SELECT SUM(number_of_visitors) FROM Visitors_With_Disabilities WHERE country = 'Germany' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What event did Diego Saraiva fight jorge gurgel?", "schema": "CREATE TABLE table_name_98 (event VARCHAR, opponent VARCHAR)", "sql": "SELECT event FROM table_name_98 WHERE opponent = 'jorge gurgel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average depth of ocean floor mapping projects in the Pacific region?", "schema": "CREATE TABLE OceanFloorMapping (id INT, region VARCHAR(20), depth FLOAT); INSERT INTO OceanFloorMapping (id, region, depth) VALUES (1, 'Pacific', 4500.5), (2, 'Atlantic', 3200.2), (3, 'Indian', 5000.0);", "sql": "SELECT AVG(depth) FROM OceanFloorMapping WHERE region = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of ERP/Power W for frequency of 89.3 fm and facility ID less than 40430", "schema": "CREATE TABLE table_name_34 (erp___power_w VARCHAR, frequency VARCHAR, facility_id VARCHAR)", "sql": "SELECT COUNT(erp___power_w) FROM table_name_34 WHERE frequency = '89.3 fm' AND facility_id < 40430;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Show an example of PostgreSQL COPY (example 1).", "schema": null, "sql": "COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';", "explanation": "PostgreSQL COPY command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the total of number of FA Cup that has a League small than 29 with Play-offs less than 0?", "schema": "CREATE TABLE table_name_50 (fa_cup VARCHAR, league VARCHAR, play_offs VARCHAR)", "sql": "SELECT COUNT(fa_cup) FROM table_name_50 WHERE league < 29 AND play_offs < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What was the total cost of sustainable building projects in Texas in 2019?", "schema": "CREATE TABLE Sustainable_Projects (id INT, project_cost FLOAT, year INT, state VARCHAR(20)); INSERT INTO Sustainable_Projects (id, project_cost, year, state) VALUES (1, 7000000, 2019, 'Texas');", "sql": "SELECT SUM(project_cost) FROM Sustainable_Projects WHERE year = 2019 AND state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 53.", "schema": null, "sql": "CREATE TABLE test (col text); INSERT INTO test VALUES ('123'); CREATE FUNCTION reffunc(refcursor) RETURNS refcursor AS ' BEGIN OPEN $1 FOR SELECT col FROM test; RETURN $1; END; ' LANGUAGE plpgsql; BEGIN; SELECT reffunc('funccursor'); FETCH ALL IN funccursor; COMMIT;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 266, "num_statements": 10} {"question": "What's the total number of players who play action games on VR platforms?", "schema": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(100), Age INT, FavoriteGenre VARCHAR(50), VRPossible BOOLEAN); INSERT INTO Players (PlayerID, Name, Age, FavoriteGenre, VRPossible) VALUES (1, 'John Doe', 25, 'Action', true), (2, 'Jane Smith', 28, 'Adventure', true), (3, 'James Johnson', 30, 'Simulation', true), (4, 'Emily Davis', 24, 'Strategy', false);", "sql": "SELECT COUNT(*) FROM Players WHERE FavoriteGenre = 'Action' AND VRPossible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of winners when the team classification leader was Kelme-Costa Blanca and the combativity award was won by Jacky Durand?", "schema": "CREATE TABLE table_2267345_2 (winner VARCHAR, team_classification VARCHAR, combativity_award VARCHAR)", "sql": "SELECT COUNT(winner) FROM table_2267345_2 WHERE team_classification = 'Kelme-Costa Blanca' AND combativity_award = 'Jacky Durand';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Which teams have experienced a year-over-year increase in average ticket sales for home games?", "schema": "CREATE TABLE sales (sale_id INT, team_id INT, year INT, ticket_sales INT); INSERT INTO sales (sale_id, team_id, year, ticket_sales) VALUES (1, 1, 2021, 500); INSERT INTO sales (sale_id, team_id, year, ticket_sales) VALUES (2, 2, 2021, 600); INSERT INTO sales (sale_id, team_id, year, ticket_sales) VALUES (3, 1, 2020, 450); INSERT INTO sales (sale_id, team_id, year, ticket_sales) VALUES (4, 2, 2020, 550);", "sql": "SELECT teams.team_name, YEAR(sales.year) as year, AVG(sales.ticket_sales) as avg_ticket_sales FROM sales JOIN teams ON sales.team_id = teams.team_id GROUP BY teams.team_name, YEAR(sales.year) HAVING AVG(sales.ticket_sales) > (SELECT AVG(sales.ticket_sales) FROM sales WHERE sales.team_id = teams.team_id AND YEAR(sales.year) = year - 1) ORDER BY teams.team_name, year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 368, "num_statements": 1} {"question": "Identify intersections with high rates of electric vehicle adoption and public transit usage in Chicago and Seattle.", "schema": "CREATE TABLE if not exists Intersections(location CHAR(10), ev_adoption FLOAT, transit_usage INT); INSERT INTO Intersections(location, ev_adoption, transit_usage) VALUES ('Chicago_1st', 0.25, 1200), ('Chicago_1st', 0.25, 1250), ('Chicago_2nd', 0.31, 1500), ('Chicago_2nd', 0.31, 1450), ('Seattle_1st', 0.28, 800), ('Seattle_1st', 0.28, 850), ('Seattle_2nd', 0.33, 1100), ('Seattle_2nd', 0.33, 1050);", "sql": "SELECT location, ev_adoption, transit_usage FROM Intersections WHERE location IN ('Chicago_1st', 'Chicago_2nd', 'Seattle_1st', 'Seattle_2nd') AND ev_adoption > 0.25 AND transit_usage > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the player number for the player from Centenary?", "schema": "CREATE TABLE table_name_66 (no_s_ VARCHAR, school_club_team_country VARCHAR)", "sql": "SELECT no_s_ FROM table_name_66 WHERE school_club_team_country = 'centenary';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average temperature recorded by IoT sensors for each crop type in the past month?", "schema": "CREATE TABLE sensor_data (sensor_id INT, crop_type VARCHAR(255), temperature DECIMAL(5,2), record_date DATE); INSERT INTO sensor_data (sensor_id, crop_type, temperature, record_date) VALUES (1, 'corn', 22.5, '2022-01-01'), (2, 'soybean', 24.3, '2022-01-02');", "sql": "SELECT s.crop_type, AVG(s.temperature) as avg_temperature FROM sensor_data s INNER JOIN (SELECT crop_type, MAX(record_date) as max_date FROM sensor_data WHERE record_date >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY crop_type) md ON s.crop_type = md.crop_type AND s.record_date = md.max_date GROUP BY s.crop_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 318, "num_statements": 1} {"question": "Find the total production quantity (in metric tons) of Europium for 2018 and 2019.", "schema": "CREATE TABLE production_data (year INT, element TEXT, production_quantity FLOAT); INSERT INTO production_data (year, element, production_quantity) VALUES (2018, 'Europium', 120); INSERT INTO production_data (year, element, production_quantity) VALUES (2019, 'Europium', 150); INSERT INTO production_data (year, element, production_quantity) VALUES (2020, 'Europium', 180);", "sql": "SELECT SUM(production_quantity) FROM production_data WHERE element = 'Europium' AND year IN (2018, 2019);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the average safety score for each creative AI application?", "schema": "CREATE TABLE CreativeAI (id INT, application VARCHAR(255), safety_score DECIMAL(5,2)); INSERT INTO CreativeAI (id, application, safety_score) VALUES (1, 'Artistic Image Generation', 85.67), (2, 'Automated Journalism', 91.23), (3, 'Music Composition', 88.98);", "sql": "SELECT application, AVG(safety_score) as avg_safety_score FROM CreativeAI GROUP BY application;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of runs where the average is less than 27.22?", "schema": "CREATE TABLE table_name_87 (runs VARCHAR, average INTEGER)", "sql": "SELECT COUNT(runs) FROM table_name_87 WHERE average < 27.22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Days of the week has a Time of broadcast in january–february, june 2002?", "schema": "CREATE TABLE table_name_31 (days_of_the_week VARCHAR, time_of_broadcast VARCHAR)", "sql": "SELECT days_of_the_week FROM table_name_31 WHERE time_of_broadcast = 'january–february, june 2002';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many 5+/inns have tyron henderson as the player, with wickets less than 21?", "schema": "CREATE TABLE table_name_40 (_inns VARCHAR, player VARCHAR, wickets VARCHAR)", "sql": "SELECT COUNT(5) + _inns FROM table_name_40 WHERE player = 'tyron henderson' AND wickets < 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "How many clients had their total assets increased by more than 10% in Q1 2022 compared to Q1 2021?", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(50), total_assets DECIMAL(10,2));CREATE TABLE transactions (transaction_id INT, client_id INT, transaction_date DATE, total_amount DECIMAL(10,2));", "sql": "SELECT COUNT(DISTINCT c.client_id) FROM clients c INNER JOIN (SELECT client_id, SUM(total_amount) as total_q1_2022 FROM transactions WHERE transaction_date BETWEEN '2022-01-01' AND '2022-03-31' GROUP BY client_id) t1 ON c.client_id = t1.client_id INNER JOIN (SELECT client_id, SUM(total_amount) as total_q1_2021 FROM transactions WHERE transaction_date BETWEEN '2021-01-01' AND '2021-03-31' GROUP BY client_id) t2 ON c.client_id = t2.client_id WHERE t1.total_q1_2022 > 1.1 * t2.total_q1_2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 492, "num_statements": 1} {"question": "Which ethical labor certifications are present in the Manufacturing table?", "schema": "CREATE TABLE Manufacturing (manufacturer_id INT, manufacturer_name TEXT, certification TEXT); INSERT INTO Manufacturing (manufacturer_id, manufacturer_name, certification) VALUES (101, 'Textile Co', 'SA8000'); INSERT INTO Manufacturing (manufacturer_id, manufacturer_name, certification) VALUES (102, 'Gadgets Inc', 'Fair Labor'); INSERT INTO Manufacturing (manufacturer_id, manufacturer_name, certification) VALUES (103, 'Eco Parts Ltd', 'ISO 14001');", "sql": "SELECT DISTINCT certification FROM Manufacturing;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Athlete from Burbank High School?", "schema": "CREATE TABLE table_name_25 (athlete VARCHAR, school VARCHAR)", "sql": "SELECT athlete FROM table_name_25 WHERE school = 'burbank high school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many male and female employees are there in the company?", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), gender VARCHAR(10), department VARCHAR(50)); INSERT INTO employees (id, name, gender, department) VALUES (1, 'John Doe', 'Male', 'Marketing'), (2, 'Jane Smith', 'Female', 'Marketing');", "sql": "SELECT gender, COUNT(*) FROM employees GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Denomination listed for the Date of Issue 12 April 2005?", "schema": "CREATE TABLE table_name_88 (denomination VARCHAR, date_of_issue VARCHAR)", "sql": "SELECT denomination FROM table_name_88 WHERE date_of_issue = '12 april 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Delete all 'Monorail' routes", "schema": "CREATE TABLE monorail_routes (route_id INT PRIMARY KEY, start_location TEXT, end_location TEXT);", "sql": "DELETE FROM monorail_routes;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_alter_table_add_foreign_key_without_name, item 15).", "schema": null, "sql": "CREATE TABLE referencing_table(id int, ref_id int);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the maximum wickets taken by players named Bill Lockwood?", "schema": "CREATE TABLE table_2482547_5 (wickets_taken INTEGER, name VARCHAR)", "sql": "SELECT MAX(wickets_taken) FROM table_2482547_5 WHERE name = 'Bill Lockwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Place, when Weapon is \"35mm fire\", and when Date is \"27 May 1982\"?", "schema": "CREATE TABLE table_name_15 (place VARCHAR, weapon VARCHAR, date VARCHAR)", "sql": "SELECT place FROM table_name_15 WHERE weapon = '35mm fire' AND date = '27 may 1982';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the minimum explainability score for AI models in the energy sector in Oceania?", "schema": "CREATE TABLE explainability_scores_2 (id INT, model_name VARCHAR(50), sector VARCHAR(50), region VARCHAR(50), score FLOAT); INSERT INTO explainability_scores_2 VALUES (1, 'EnergyModel1', 'Energy', 'Oceania', 0.75), (2, 'EnergyModel2', 'Energy', 'Oceania', 0.82), (3, 'EnergyModel3', 'Energy', 'Europe', 0.93);", "sql": "SELECT MIN(score) FROM explainability_scores_2 WHERE sector = 'Energy' AND region = 'Oceania';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 183).", "schema": null, "sql": "create table boolrangep_tf partition of boolrangep for values from ('true', 'false', 0) to ('true', 'false', 100);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What are the names and locations of all Ytterbium mines in North America?", "schema": "CREATE TABLE ytterbium_mines (mine_name VARCHAR(50), country VARCHAR(20)); INSERT INTO ytterbium_mines (mine_name, country) VALUES ('Y1 Mine', 'USA'), ('Y2 Mine', 'Canada'), ('Y3 Mine', 'Mexico');", "sql": "SELECT mine_name, country FROM ytterbium_mines WHERE country IN ('USA', 'Canada', 'Mexico');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the away captain when the game resulted in [[|]] by 7 wickets?", "schema": "CREATE TABLE table_name_68 (away_captain VARCHAR, result VARCHAR)", "sql": "SELECT away_captain FROM table_name_68 WHERE result = '[[|]] by 7 wickets';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the Italian Grand Prix?", "schema": "CREATE TABLE table_28925058_1 (date VARCHAR, grand_prix VARCHAR)", "sql": "SELECT date FROM table_28925058_1 WHERE grand_prix = 'Italian grand_prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the catalogue number for Brazil?", "schema": "CREATE TABLE table_name_8 (catalogue__number VARCHAR, country VARCHAR)", "sql": "SELECT catalogue__number FROM table_name_8 WHERE country = 'brazil';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total budget for agricultural innovation projects in Europe?", "schema": "CREATE TABLE agricultural_innovation (id INT, project_name VARCHAR(50), location VARCHAR(50), budget FLOAT); INSERT INTO agricultural_innovation (id, project_name, location, budget) VALUES (1, 'Precision Farming', 'France', 600000.00);", "sql": "SELECT SUM(budget) FROM agricultural_innovation WHERE location LIKE '%Europe%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the minimum efficiency achieved by each energy project in a given year?", "schema": "CREATE TABLE efficiency_stats (id INT PRIMARY KEY, project_id INT, year INT, efficiency FLOAT, FOREIGN KEY (project_id) REFERENCES projects(id)); INSERT INTO efficiency_stats (id, project_id, year, efficiency) VALUES (7, 1, 2020, 0.15), (8, 2, 2020, 0.28);", "sql": "SELECT project_id, MIN(efficiency) FROM efficiency_stats GROUP BY project_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'merge' (example 423).", "schema": null, "sql": "$$;\nSELECT * FROM pa_target ORDER BY tid, val;\nROLLBACK;\n\n-- as above, but blocked by BEFORE INSERT ROW trigger\nBEGIN;\nCREATE FUNCTION trig_fn() RETURNS trigger LANGUAGE plpgsql AS\n $$ BEGIN RETURN NULL; END; $$;", "explanation": "PL/pgSQL object from PostgreSQL core test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 213, "num_statements": 7} {"question": "Generate PostgreSQL SQL for: what is the max fs where the status is status and the method is method?", "schema": "CREATE TABLE table_17157367_1 (max_fs VARCHAR)", "sql": "SELECT max_fs FROM table_17157367_1 WHERE \"status\" = 'status' AND \"method\" = 'method';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total installed capacity of wind farms in the 'renewable_energy' schema?", "schema": "CREATE TABLE wind_farms (id INT, name VARCHAR(50), location VARCHAR(50), installed_capacity FLOAT); INSERT INTO wind_farms (id, name, location, installed_capacity) VALUES (1, 'Wind Farm 1', 'Location A', 100.5), (2, 'Wind Farm 2', 'Location B', 150.2);", "sql": "SELECT SUM(installed_capacity) FROM wind_farms;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Add a column 'gender' to the 'farmers' table", "schema": "CREATE TABLE farmers (farmer_id INT PRIMARY KEY, name VARCHAR(255), age INT, location VARCHAR(255));", "sql": "ALTER TABLE farmers ADD COLUMN gender VARCHAR(10);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What name has tour Apps larger than 7, tests larger than 4, and a Career caps of 20?", "schema": "CREATE TABLE table_name_22 (name VARCHAR, career_caps VARCHAR, tour_apps VARCHAR, tests VARCHAR)", "sql": "SELECT name FROM table_name_22 WHERE tour_apps > 7 AND tests > 4 AND career_caps = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Get the number of passengers per train for the 8 AM train on each route", "schema": "CREATE TABLE passengers (passenger_id INT, train_id INT, route_id INT, num_passengers INT);", "sql": "SELECT route_id, num_passengers FROM passengers WHERE train_id IN (SELECT train_id FROM train_schedule WHERE departure_time = '08:00:00') GROUP BY route_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Record on Week 13 at the Hoosier Dome?", "schema": "CREATE TABLE table_name_97 (record VARCHAR, game_site VARCHAR, week VARCHAR)", "sql": "SELECT record FROM table_name_97 WHERE game_site = 'hoosier dome' AND week = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Season has a Score of 3 – 3 aet , 4–3 pen", "schema": "CREATE TABLE table_name_31 (season VARCHAR, score VARCHAR)", "sql": "SELECT season FROM table_name_31 WHERE score = '3 – 3 aet , 4–3 pen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 611).", "schema": null, "sql": "SELECT 'NaN'::numeric::int8;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'NaN'::numeric::int8) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Venue, when the Winner is iain pyman?", "schema": "CREATE TABLE table_name_19 (venue VARCHAR, winner VARCHAR)", "sql": "SELECT venue FROM table_name_19 WHERE winner = 'iain pyman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "How many mobile customers have a postpaid subscription in the country of Australia?", "schema": "CREATE TABLE mobile_subscribers (subscriber_id INT, subscription_type VARCHAR(10), country VARCHAR(20)); INSERT INTO mobile_subscribers (subscriber_id, subscription_type, country) VALUES (1, 'postpaid', 'Australia'), (2, 'prepaid', 'Australia'), (3, 'postpaid', 'Australia');", "sql": "SELECT COUNT(*) FROM mobile_subscribers WHERE subscription_type = 'postpaid' AND country = 'Australia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 217).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (4,8,'627208063620965.397582272040628872773601055303353339700043792111288801181637510303989399395425313995651311362368773096988861977687484912995632130587762386590996099363383976320342247076516604162469063709298438133327434461462906199160715395064249299615054970359309619951777972710299484596875999967582794277241285253106817446259313281064844416249524876385699646393555435017820686376877981018047574348711991428666249794623006175739581915209218834701034964043360823844816042368184094857692062884223864639972005010863342567608351008172649209459933114800143792514183138995700133608613158857147417653998048890116531052767737435620558349226865105888201598712435680481803901906613772821370519525404423549161696526405320391828194356063547089626322474164332505209233143121068245585662919687001395119229263995765376465304715643388771609446');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 866, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Title has a Presentation of Credentials of October 4, 1988?", "schema": "CREATE TABLE table_name_90 (title VARCHAR, presentation_of_credentials VARCHAR)", "sql": "SELECT title FROM table_name_90 WHERE presentation_of_credentials = 'october 4, 1988';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What city is Mackey Arena in?", "schema": "CREATE TABLE table_name_46 (city VARCHAR, venue VARCHAR)", "sql": "SELECT city FROM table_name_46 WHERE venue = 'mackey arena';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'misc' (example 44).", "schema": null, "sql": "--\n-- the next two queries demonstrate how functions generate bogus duplicates.\n-- this is a \"feature\" ..\n--\nSELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r\n ORDER BY 1,2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Delete all records from the table \"marine_protected_areas\" where country is 'Indonesia'", "schema": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(50), size FLOAT, country VARCHAR(50));", "sql": "DELETE FROM marine_protected_areas WHERE country = 'Indonesia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Which food supplier had the most safety violations in 2021?", "schema": "CREATE TABLE SupplierSafetyRecords (SupplierName VARCHAR(50), Year INT, SafetyViolations INT); INSERT INTO SupplierSafetyRecords (SupplierName, Year, SafetyViolations) VALUES ('ABC Foods', 2021, 5), ('XYZ Foods', 2021, 8), ('123 Foods', 2021, 3), ('FoodCo', 2021, 7), ('EcoFoods', 2021, 4);", "sql": "SELECT SupplierName, MAX(SafetyViolations) FROM SupplierSafetyRecords WHERE Year = 2021 GROUP BY SupplierName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "How many songs were added to Deezer's Latin music library in the last month?", "schema": "CREATE TABLE DeezerSongs (SongID INT, AddedDate DATE, Genre VARCHAR(50)); INSERT INTO DeezerSongs (SongID, AddedDate, Genre) VALUES (1, '2022-02-15', 'Latin'), (2, '2022-02-16', 'Pop');", "sql": "SELECT COUNT(*) FROM DeezerSongs WHERE AddedDate >= DATEADD(MONTH, -1, GETDATE()) AND Genre = 'Latin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'stats_ext' (example 450).", "schema": null, "sql": "INSERT INTO mcv_lists (a, b, c, ia, filler1)\n SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i\n FROM generate_series(1,5000) s(i);", "explanation": "DML from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show theme and year for all exhibitions with ticket prices lower than 15.", "schema": "CREATE TABLE exhibition (theme VARCHAR, YEAR VARCHAR, ticket_price INTEGER)", "sql": "SELECT theme, YEAR FROM exhibition WHERE ticket_price < 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the indoor track status for the school that has yes for tennis and no for golf?", "schema": "CREATE TABLE table_name_91 (indoor_track VARCHAR, tennis VARCHAR, golf VARCHAR)", "sql": "SELECT indoor_track FROM table_name_91 WHERE tennis = 'yes' AND golf = 'no';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "How many doctors are there in each hospital in 'rural_healthcare' schema?", "schema": "CREATE TABLE Doctors (id INT, name VARCHAR(100), hospital_id INT); INSERT INTO Doctors VALUES (1, 'Dr. Smith', 1), (2, 'Dr. Johnson', 1), (3, 'Dr. Brown', 2), (4, 'Dr. Davis', 3);", "sql": "SELECT h.name, COUNT(d.id) as doctor_count FROM Hospitals h JOIN Doctors d ON h.id = d.hospital_id GROUP BY h.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the average number of employees for startups founded by individuals who identify as Two-Spirit?", "schema": "CREATE TABLE companies (id INT, name TEXT, founding_year INT, founder_identifies_as_two_spirit BOOLEAN, num_employees INT); INSERT INTO companies (id, name, founding_year, founder_identifies_as_two_spirit, num_employees) VALUES (1, 'Theta Labs', 2022, true, 50); INSERT INTO companies (id, name, founding_year, founder_identifies_as_two_spirit, num_employees) VALUES (2, 'Iota Inc', 2019, false, 30); INSERT INTO companies (id, name, founding_year, founder_identifies_as_two_spirit, num_employees) VALUES (3, 'Kappa Corp', 2020, true, 75);", "sql": "SELECT AVG(num_employees) FROM companies WHERE founder_identifies_as_two_spirit = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What are the names and locations of indigenous communities in Greenland with a population over 10,000 that speak Greenlandic?", "schema": "CREATE TABLE Indigenous_Communities (id INT, name VARCHAR(100), population INT, location VARCHAR(100), language VARCHAR(100)); INSERT INTO Indigenous_Communities (id, name, population, location, language) VALUES (1, 'Inuit', 15000, 'Greenland', 'Greenlandic'); INSERT INTO Indigenous_Communities (id, name, population, location, language) VALUES (2, 'Kalaallit', 20000, 'Greenland', 'Greenlandic');", "sql": "SELECT name, location FROM Indigenous_Communities WHERE population > 10000 AND language = 'Greenlandic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the total fare collected for each route on January 1, 2022?", "schema": "CREATE TABLE route (route_id INT, route_name VARCHAR(255)); INSERT INTO route (route_id, route_name) VALUES (1, 'Route 1'), (2, 'Route 2'); CREATE TABLE fares (fare_id INT, route_id INT, fare_amount DECIMAL, fare_date DATE); INSERT INTO fares (fare_id, route_id, fare_amount, fare_date) VALUES (1, 1, 2.50, '2022-01-01'), (2, 1, 2.50, '2022-01-01'), (3, 2, 3.25, '2022-01-01'), (4, 2, 3.25, '2022-01-01');", "sql": "SELECT r.route_name, SUM(f.fare_amount) as total_fare FROM fares f JOIN route r ON f.route_id = r.route_id WHERE f.fare_date = '2022-01-01' GROUP BY r.route_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the landing site when the duration on the lunar surface was 21:31?", "schema": "CREATE TABLE table_name_52 (lunar_landing_site VARCHAR, duration_on_lunar_surface VARCHAR)", "sql": "SELECT lunar_landing_site FROM table_name_52 WHERE duration_on_lunar_surface = '21:31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total data usage for each mobile user?", "schema": "CREATE TABLE mobile_data (user_id INT, username VARCHAR(50), data_usage INT); INSERT INTO mobile_data VALUES (1, 'User 1', 200); INSERT INTO mobile_data VALUES (2, 'User 2', 300); INSERT INTO mobile_data VALUES (3, 'User 3', 150); INSERT INTO mobile_data VALUES (4, 'User 4', 400);", "sql": "SELECT username, SUM(data_usage) as total_data_usage FROM mobile_data GROUP BY username;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date is Jerilyn Britz the runner-up?", "schema": "CREATE TABLE table_name_32 (date VARCHAR, runner_s__up VARCHAR)", "sql": "SELECT date FROM table_name_32 WHERE runner_s__up = 'jerilyn britz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total number of co-owned properties in affordable neighborhoods in Paris?", "schema": "CREATE TABLE Paris_Neighborhoods (Neighborhood_Name TEXT, Affordability BOOLEAN); INSERT INTO Paris_Neighborhoods (Neighborhood_Name, Affordability) VALUES ('Marais', false), ('Saint Germain', false), ('Latin Quarter', true), ('Bastille', true), ('Montmartre', false); CREATE TABLE Paris_Properties (Neighborhood_Name TEXT, Co_Ownership BOOLEAN); INSERT INTO Paris_Properties (Neighborhood_Name, Co_Ownership) VALUES ('Marais', true), ('Saint Germain', false), ('Latin Quarter', true), ('Bastille', true), ('Montmartre', false);", "sql": "SELECT COUNT(Paris_Properties.Co_Ownership) FROM Paris_Properties INNER JOIN Paris_Neighborhoods ON Paris_Properties.Neighborhood_Name = Paris_Neighborhoods.Neighborhood_Name WHERE Paris_Neighborhoods.Affordability = true AND Paris_Properties.Co_Ownership = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 263, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many people attended the game when the game was won 4-2?", "schema": "CREATE TABLE table_name_44 (attendance VARCHAR, result VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_44 WHERE result = 'won 4-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 367).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (8,6,'8496986223.59370017133658391078540506786813528391482589743854926337571311247664927673026627333807725155622490761003765241409149793494330798800');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the name of the phone model launched in year 2002 and with the highest RAM size.", "schema": "CREATE TABLE chip_model (Model_name VARCHAR, Launch_year VARCHAR, RAM_MiB VARCHAR); CREATE TABLE phone (Hardware_Model_name VARCHAR, chip_model VARCHAR)", "sql": "SELECT T2.Hardware_Model_name FROM chip_model AS T1 JOIN phone AS T2 ON T1.Model_name = T2.chip_model WHERE T1.Launch_year = 2002 ORDER BY T1.RAM_MiB DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the April 28 rank when the Mar 24 is 17?", "schema": "CREATE TABLE table_name_92 (april_28 VARCHAR, mar_24 VARCHAR)", "sql": "SELECT april_28 FROM table_name_92 WHERE mar_24 = '17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Which movies have the lowest IMDb rating per genre, sorted by release year?", "schema": "CREATE TABLE movie_ratings (id INT, title VARCHAR(255), release_year INT, genre VARCHAR(255), imdb_rating DECIMAL(3,2)); INSERT INTO movie_ratings (id, title, release_year, genre, imdb_rating) VALUES (1, 'Movie1', 2018, 'Action', 5.2), (2, 'Movie2', 2019, 'Comedy', 5.5), (3, 'Movie3', 2017, 'Drama', 6.0), (4, 'Movie4', 2018, 'Animation', 4.8), (5, 'Movie5', 2019, 'Documentary', 5.3);", "sql": "SELECT genre, title, release_year, MIN(imdb_rating) AS lowest_imdb_rating FROM movie_ratings GROUP BY genre ORDER BY release_year, lowest_imdb_rating;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total revenue for each game in the 'gaming' database?", "schema": "CREATE TABLE games (game_id INT, game_name VARCHAR(50), revenue FLOAT); INSERT INTO games (game_id, game_name, revenue) VALUES (1, 'GameA', 5000000), (2, 'GameB', 7000000), (3, 'GameC', 3000000);", "sql": "SELECT game_name, SUM(revenue) as total_revenue FROM games GROUP BY game_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Insert new network infrastructure investment records for the Northeast region.", "schema": "CREATE TABLE network_investments (investment_id INT, amount FLOAT, region VARCHAR(20));", "sql": "INSERT INTO network_investments (investment_id, amount, region) VALUES (1, 50000.0, 'Northeast'), (2, 60000.0, 'Northeast'), (3, 45000.0, 'Northeast');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "What is the average age of male reporters in the 'news' table?", "schema": "CREATE TABLE news (id INT, name VARCHAR(50), gender VARCHAR(10), age INT); INSERT INTO news (id, name, gender, age) VALUES (1, 'John', 'Male', 35), (2, 'Alex', 'Male', 45);", "sql": "SELECT AVG(age) FROM news WHERE gender = 'Male';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 56).", "schema": null, "sql": "SELECT 'init' FROM pg_create_physical_replication_slot('orig_slot2', false);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total revenue for each restaurant in 'Downtown'?", "schema": "CREATE TABLE sales (sale_id INT, restaurant_id INT, menu_category VARCHAR(255), revenue INT); INSERT INTO sales (sale_id, restaurant_id, menu_category, revenue) VALUES (1, 1, 'Appetizers', 500), (2, 1, 'Entrees', 700), (3, 2, 'Desserts', 600), (4, 2, 'Beverages', 800); CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(255), area VARCHAR(255)); INSERT INTO restaurants (restaurant_id, name, area) VALUES (1, 'Gourmet Delight', 'Downtown'), (2, 'Spicy Express', 'Uptown');", "sql": "SELECT r.name, SUM(s.revenue) AS total_revenue FROM sales s JOIN restaurants r ON s.restaurant_id = r.restaurant_id WHERE r.area = 'Downtown' GROUP BY r.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the total length of bridges in the transport division?", "schema": "CREATE TABLE Projects (id INT, division VARCHAR(10)); INSERT INTO Projects (id, division) VALUES (1, 'water'), (2, 'transport'), (3, 'energy'); CREATE TABLE TransportProjects (id INT, project_id INT, length DECIMAL(10,2)); INSERT INTO TransportProjects (id, project_id, length) VALUES (1, 2, 500), (2, 2, 550), (3, 3, 600);", "sql": "SELECT SUM(t.length) FROM TransportProjects t JOIN Projects p ON t.project_id = p.id WHERE p.division = 'transport';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the number of public bicycles available in each city in the Netherlands?", "schema": "CREATE TABLE if not exists public_bicycles (id INT, city VARCHAR(255), bikes INT); INSERT INTO public_bicycles (id, city, bikes) VALUES (1, 'Amsterdam', 15000), (2, 'Utrecht', 12000), (3, 'Rotterdam', 8000), (4, 'The Hague', 6000);", "sql": "SELECT city, bikes FROM public_bicycles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the margin of the Masters Tournament?", "schema": "CREATE TABLE table_13026799_1 (margin VARCHAR, championship VARCHAR)", "sql": "SELECT margin FROM table_13026799_1 WHERE championship = 'Masters Tournament';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Ismail Qemali Bej's term start?", "schema": "CREATE TABLE table_name_61 (term_start VARCHAR, name VARCHAR)", "sql": "SELECT term_start FROM table_name_61 WHERE name = 'ismail qemali bej';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL regression test 'tidrangescan': Write the SELECT query (example 29).", "schema": null, "sql": "SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1;", "explanation": "Regression test for Tidrangescan in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of volunteers for each program in the year 2020?", "schema": "CREATE TABLE programs (id INT, name VARCHAR(255)); INSERT INTO programs (id, name) VALUES (1, 'Education'), (2, 'Health'), (3, 'Environment'); CREATE TABLE volunteers (id INT, program_id INT, volunteer_date DATE); INSERT INTO volunteers (id, program_id, volunteer_date) VALUES (1, 1, '2020-01-01'), (2, 1, '2020-02-01'), (3, 2, '2020-03-01');", "sql": "SELECT v.program_id, COUNT(*) as total_volunteers FROM volunteers v WHERE YEAR(v.volunteer_date) = 2020 GROUP BY v.program_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of the player from round 5?", "schema": "CREATE TABLE table_name_31 (nationality VARCHAR, round VARCHAR)", "sql": "SELECT nationality FROM table_name_31 WHERE round = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 110).", "schema": null, "sql": "SELECT cosh(float8 '1');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT cosh(float8 '1')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "What is the most common type of incident in the 'safety_records' table?", "schema": "CREATE TABLE safety_records (id INT, incident_type VARCHAR(50), incident_date DATE, description VARCHAR(100));", "sql": "SELECT incident_type, COUNT(*) FROM safety_records GROUP BY incident_type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many pressure figures are given for the .380 acp cartridge?", "schema": "CREATE TABLE table_173103_1 (max_pressure VARCHAR, cartridge VARCHAR)", "sql": "SELECT COUNT(max_pressure) FROM table_173103_1 WHERE cartridge = '.380 ACP';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who built giancarlo fisichella's car?", "schema": "CREATE TABLE table_name_96 (constructor VARCHAR, driver VARCHAR)", "sql": "SELECT constructor FROM table_name_96 WHERE driver = 'giancarlo fisichella';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Madrid (Stuttgart) tournament with a 1996 of A has this for a Career SR?", "schema": "CREATE TABLE table_name_66 (career_sr VARCHAR, tournament VARCHAR)", "sql": "SELECT career_sr FROM table_name_66 WHERE 1996 = 'a' AND tournament = 'madrid (stuttgart)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the average income of residents in urban areas who have completed a bachelor's degree or higher, by state?", "schema": "CREATE TABLE residents (id INT PRIMARY KEY, state VARCHAR(2), education VARCHAR(50), income FLOAT, area VARCHAR(10)); INSERT INTO residents (id, state, education, income, area) VALUES (1, 'NY', 'Bachelor’s Degree', 60000, 'Urban');", "sql": "SELECT AVG(income) FROM residents WHERE education LIKE '%Bachelor’s Degree%' AND area = 'Urban' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 140).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (2,9,'-54582443595378013373024060492546032003692.4875677735896411267274323339692558458420972958075073392126734000341372096298914875892612108329218081214550050039133117695428196702128258481789017059073444323729583900855712795086447886053552786449313809589992185978097430132940882612817775035217244553616977182049775786664446683332098226841743818600819221587510039430478859412452506872131851471967577741190323481953867845129745440745526578327709351120432530702446916035797432129052518980799424635406993848916727957825620638983706180841278402925286540375225365057191075559133035');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 609, "num_statements": 1} {"question": "What is the difference in average temperature between the hottest and coldest regions for each month, in a given year?", "schema": "CREATE TABLE temperature_data (region VARCHAR(255), temperature INT, date DATE); INSERT INTO temperature_data (region, temperature, date) VALUES ('North', 25, '2022-01-01'), ('South', 10, '2022-01-01'), ('East', 15, '2022-01-01'), ('West', 30, '2022-01-01');", "sql": "SELECT hottest.region, hottest.max_temp - coldest.min_temp as temp_diff FROM (SELECT region, MAX(temperature) as max_temp FROM temperature_data GROUP BY region) hottest INNER JOIN (SELECT region, MIN(temperature) as min_temp FROM temperature_data GROUP BY region) coldest ON hottest.region = coldest.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 307, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Wins, when Draws is less than 2?", "schema": "CREATE TABLE table_name_48 (wins VARCHAR, draws INTEGER)", "sql": "SELECT COUNT(wins) FROM table_name_48 WHERE draws < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 111).", "schema": null, "sql": "SELECT date '1991-02-03' - time with time zone '04:05:06 UTC' AS \"Subtract Time UTC\";", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT date '1991-02-03' - time with time zone '04:05:06 UTC' AS \"Subtract Time UTC\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete policyholders who have not made any claims in the last 3 years.", "schema": "CREATE TABLE Policyholders (ID INT, Name VARCHAR(50), Age INT, Gender VARCHAR(10), City VARCHAR(50), State VARCHAR(20), ZipCode VARCHAR(10)); CREATE TABLE Claims (ID INT, PolicyholderID INT, ClaimAmount DECIMAL(10,2), ClaimDate DATE);", "sql": "DELETE FROM Policyholders WHERE ID NOT IN (SELECT PolicyholderID FROM Claims WHERE ClaimDate > DATE_SUB(CURDATE(), INTERVAL 3 YEAR));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Display the usernames of users who have posted about mental health in the past month and have less than 1,000 followers, sorted by the number of posts related to mental health in descending order.", "schema": "CREATE TABLE users (user_id INT, user_name VARCHAR(50), join_date DATE, follower_count INT);CREATE TABLE posts (post_id INT, user_id INT, post_content TEXT, post_date DATE);INSERT INTO users (user_id, user_name, join_date, follower_count) VALUES (1, 'user1', '2021-01-01', 15000), (2, 'user2', '2021-02-01', 12000), (3, 'user3', '2021-03-01', 5000), (4, 'user4', '2021-03-01', 800), (5, 'user5', '2021-03-01', 150);", "sql": "SELECT u.user_name FROM users u JOIN posts p ON u.user_id = p.user_id WHERE p.post_content LIKE '%mental health%' AND p.post_date >= DATEADD(month, -1, GETDATE()) AND u.follower_count < 1000 GROUP BY u.user_name ORDER BY COUNT(p.post_id) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "Delete the record of the reader with the ID of 4 if it exists.", "schema": "CREATE TABLE readers (id INT, name VARCHAR(50), age INT, preference VARCHAR(50)); INSERT INTO readers (id, name, age, preference) VALUES (1, 'John Doe', 30, 'technology'), (2, 'Jane Smith', 45, 'sports'), (3, 'Bob Johnson', 28, 'politics'), (4, 'Alice Davis', 34, 'international');", "sql": "DELETE FROM readers WHERE id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the average age of female offenders in the justice_data schema's adult_offenders table?", "schema": "CREATE TABLE justice_data.adult_offenders (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), offense VARCHAR(50));", "sql": "SELECT AVG(age) FROM justice_data.adult_offenders WHERE gender = 'female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Who is the oldest visitor from Sydney?", "schema": "CREATE TABLE Visitors (id INT, name VARCHAR(20), age INT, city VARCHAR(20)); INSERT INTO Visitors (id, name, age, city) VALUES (1, 'Alex', 45, 'Sydney'), (2, 'Beth', 30, 'Sydney');", "sql": "SELECT name, MAX(age) as max_age FROM Visitors WHERE city = 'Sydney';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all cases that have a billing amount greater than the average billing amount for all cases.", "schema": "CREATE TABLE cases (case_id INT, billing_amount INT); INSERT INTO cases (case_id, billing_amount) VALUES (1, 8000), (2, 6000), (3, 12000), (4, 4000), (5, 9000), (6, 5000);", "sql": "SELECT cases.case_id, cases.billing_amount FROM cases WHERE cases.billing_amount > (SELECT AVG(cases.billing_amount) FROM cases);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'time' (example 11).", "schema": null, "sql": "INSERT INTO TIME_TBL VALUES ('2003-07-07 15:36:39 America/New_York');", "explanation": "DML from PostgreSQL core regression test for Time.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: There is a building at 800 Boylston Street, how many floors does it have?", "schema": "CREATE TABLE table_name_81 (floors INTEGER, street_address VARCHAR)", "sql": "SELECT SUM(floors) FROM table_name_81 WHERE street_address = '800 boylston street';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "List the policyholders who have made at least one claim in descending order of claim amount.", "schema": "CREATE TABLE Policyholders (PolicyholderID INT, Name VARCHAR(50)); CREATE TABLE Claims (PolicyholderID INT, ClaimAmount DECIMAL(10,2)); INSERT INTO Policyholders VALUES (1, 'John Doe'); INSERT INTO Policyholders VALUES (2, 'Jane Smith'); INSERT INTO Claims VALUES (1, 2000); INSERT INTO Claims VALUES (2, 3000); INSERT INTO Claims VALUES (3, 1000);", "sql": "SELECT Policyholders.PolicyholderID, Policyholders.Name, Claims.ClaimAmount FROM Policyholders INNER JOIN Claims ON Policyholders.PolicyholderID = Claims.PolicyholderID WHERE Policyholders.PolicyholderID IN (SELECT PolicyholderID FROM Claims) ORDER BY ClaimAmount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 36).", "schema": null, "sql": "CREATE FUNCTION oid_dist(oid, oid)\nRETURNS oid\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the total number of veteran job applications in California in the last year?", "schema": "CREATE TABLE veteran_jobs (id INT, state VARCHAR(50), application_date DATE); INSERT INTO veteran_jobs (id, state, application_date) VALUES (1, 'Texas', '2021-02-15'), (2, 'California', '2021-04-10'), (3, 'Texas', '2022-01-05'), (4, 'California', '2022-03-08');", "sql": "SELECT COUNT(*) FROM veteran_jobs WHERE state = 'California' AND application_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Find the average investment amount for carbon offset projects in the 'Africa' and 'Europe' regions, excluding oceanic projects.", "schema": "CREATE SCHEMA carbon_offsets; CREATE TABLE projects (project_name VARCHAR(255), region VARCHAR(255), investment_amount INT); INSERT INTO projects (project_name, region, investment_amount) VALUES ('Tropical Forest Conservation', 'Asia', 5000000), ('Wind Power Generation', 'Europe', 8000000), ('Soil Carbon Sequestration', 'Africa', 3000000), ('Oceanic Algae Farming', 'Oceania', 7000000);", "sql": "SELECT region, AVG(investment_amount) FROM carbon_offsets.projects WHERE region IN ('Africa', 'Europe') AND region != 'Oceania' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the club with date of appointment of 1 november 2007", "schema": "CREATE TABLE table_name_67 (club VARCHAR, date_of_appointment VARCHAR)", "sql": "SELECT club FROM table_name_67 WHERE date_of_appointment = '1 november 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average decile of Ruapehu college, which has a state authority?", "schema": "CREATE TABLE table_name_64 (decile INTEGER, authority VARCHAR, name VARCHAR)", "sql": "SELECT AVG(decile) FROM table_name_64 WHERE authority = 'state' AND name = 'ruapehu college';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the regulatory status of digital assets by type?", "schema": "CREATE TABLE RegulatoryFrameworks (FrameworkID int, AssetType varchar(50), RegulatoryStatus varchar(50)); INSERT INTO RegulatoryFrameworks (FrameworkID, AssetType, RegulatoryStatus) VALUES (1, 'Cryptocurrency', 'Regulated'), (2, 'Security Token', 'Partially Regulated'), (3, 'Utility Token', 'Unregulated'), (4, 'Stablecoin', 'Partially Regulated');", "sql": "SELECT AssetType, RegulatoryStatus, COUNT(*) as Count FROM RegulatoryFrameworks GROUP BY AssetType, RegulatoryStatus;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In language where Thursday is برس وار bres'var, what is Sunday?", "schema": "CREATE TABLE table_1277350_3 (sunday_surya__the_sun_ VARCHAR, thursday_guru__jupiter_ VARCHAR)", "sql": "SELECT sunday_surya__the_sun_ FROM table_1277350_3 WHERE thursday_guru__jupiter_ = 'برس وار Bres'var';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the game before January 22 with a 30-7-7 record?", "schema": "CREATE TABLE table_name_54 (score VARCHAR, january VARCHAR, record VARCHAR)", "sql": "SELECT score FROM table_name_54 WHERE january < 22 AND record = '30-7-7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many cruelty-free certifications were granted to each brand in the past 2 years?", "schema": "CREATE TABLE brands (brand_id INT, brand_name TEXT); CREATE TABLE cruelty_free_certifications (certification_id INT, brand_id INT, certification_date DATE);", "sql": "SELECT brand_name, COUNT(*) as certifications_in_past_2_years FROM brands JOIN cruelty_free_certifications ON brands.brand_id = cruelty_free_certifications.brand_id WHERE certification_date > DATEADD(year, -2, CURRENT_DATE) GROUP BY brand_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_table' (example 163).", "schema": null, "sql": "CREATE TABLE like_constraint_rename_cache\n (LIKE constraint_rename_cache INCLUDING ALL);", "explanation": "DDL from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Order with an Elector of Marino Bulcani?", "schema": "CREATE TABLE table_name_61 (order VARCHAR, elector VARCHAR)", "sql": "SELECT order FROM table_name_61 WHERE elector = 'marino bulcani';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game when North Melbourne was the away team?", "schema": "CREATE TABLE table_name_10 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_10 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many female students (sex is F) whose age is below 25?", "schema": "CREATE TABLE student (sex VARCHAR, age VARCHAR)", "sql": "SELECT COUNT(*) FROM student WHERE sex = 'F' AND age < 25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What was the total weight of artifacts excavated per analyst each year?", "schema": "CREATE TABLE analysts (analyst_id INT, name VARCHAR(50), start_date DATE); INSERT INTO analysts (analyst_id, name, start_date) VALUES (1, 'John Doe', '2020-01-01'); CREATE TABLE artifact_analysis (analysis_id INT, artifact_id INT, analyst_id INT, analysis_date DATE, weight DECIMAL(5,2));", "sql": "SELECT a.name, EXTRACT(YEAR FROM aa.analysis_date) as analysis_year, SUM(aa.weight) as total_weight FROM analysts a JOIN artifact_analysis aa ON a.analyst_id = aa.analyst_id GROUP BY a.analyst_id, a.name, analysis_year ORDER BY analysis_year, total_weight DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many incumbents were first elected in 1984?", "schema": "CREATE TABLE table_1341453_40 (incumbent VARCHAR, first_elected VARCHAR)", "sql": "SELECT COUNT(incumbent) FROM table_1341453_40 WHERE first_elected = '1984';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total duration of sunlight for each field in the past year?", "schema": "CREATE TABLE field (id INT, field_id INT, sunlight_duration INT, timestamp DATETIME);", "sql": "SELECT field_id, SUM(sunlight_duration) as total_sunlight_duration FROM field WHERE timestamp >= DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 YEAR) GROUP BY field_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "How many algorithmic fairness assessments were conducted in Canada, Spain, and Germany in Q1 2022?", "schema": "CREATE TABLE fairness_assessments (assessment_id INT, assessment_date DATE, country TEXT); INSERT INTO fairness_assessments (assessment_id, assessment_date, country) VALUES (1, '2022-01-02', 'Canada'), (2, '2022-02-15', 'Spain'), (3, '2022-03-27', 'Germany');", "sql": "SELECT COUNT(*) as num_assessments FROM fairness_assessments WHERE country IN ('Canada', 'Spain', 'Germany') AND assessment_date BETWEEN '2022-01-01' AND '2022-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "List all compliance violations and corresponding dispensaries in Washington state.", "schema": "CREATE TABLE violations (id INT, dispensary_id INT, description VARCHAR(255)); INSERT INTO violations (id, dispensary_id, description) VALUES (1, 1, 'Incorrect labeling'), (2, 2, 'Missing security cameras');", "sql": "SELECT d.name, v.description FROM dispensaries d INNER JOIN violations v ON d.id = v.dispensary_id WHERE d.state = 'WA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'float8' (example 167).", "schema": null, "sql": "-- test exact cases for trigonometric functions in degrees\n\nSELECT x,\n sind(x),\n sind(x) IN (-1,-0.5,0,0.5,1) AS sind_exact\nFROM (VALUES (0), (30), (90), (150), (180),\n (210), (270), (330), (360)) AS t(x);", "explanation": "PL/pgSQL object from PostgreSQL core test for Float8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "Who are the military technology asset manufacturers with the highest number of acquisitions by country?", "schema": "CREATE TABLE military_tech_manufacturers (manufacturer_id INT PRIMARY KEY, manufacturer_name VARCHAR(255), country VARCHAR(255), num_of_acquisitions INT); INSERT INTO military_tech_manufacturers (manufacturer_id, manufacturer_name, country, num_of_acquisitions) VALUES (1, 'Lockheed Martin', 'USA', 50), (2, 'BAE Systems', 'UK', 45), (3, 'Elbit Systems', 'Israel', 40), (4, 'Mitsubishi Heavy Industries', 'Japan', 35);", "sql": "SELECT country, manufacturer_name, num_of_acquisitions FROM military_tech_manufacturers ORDER BY num_of_acquisitions DESC, country ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Calculate the average revenue per album for all rock albums available on the 'desktop' platform.", "schema": "CREATE TABLE artists (id INT, name TEXT, genre TEXT); CREATE TABLE albums (id INT, title TEXT, artist_id INT, platform TEXT); CREATE TABLE sales (id INT, album_id INT, quantity INT, revenue DECIMAL); CREATE VIEW rock_desktop_albums AS SELECT a.id, a.title, ar.name FROM albums a JOIN artists ar ON a.artist_id = ar.id WHERE ar.genre = 'rock' AND a.platform = 'desktop'; CREATE VIEW rock_desktop_sales AS SELECT s.album_id, AVG(s.revenue) as avg_revenue FROM sales s JOIN rock_desktop_albums rda ON s.album_id = rda.id GROUP BY album_id;", "sql": "SELECT avg_revenue FROM rock_desktop_sales;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the minimum maintenance cost for military helicopters in the last quarter?", "schema": "CREATE TABLE military_aircraft (aircraft_id INT, aircraft_model TEXT, age INT, maintenance_cost DECIMAL(10,2), aircraft_type TEXT);", "sql": "SELECT MIN(maintenance_cost) FROM military_aircraft WHERE aircraft_type = 'helicopter' AND maintenance_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 192).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_normal_function( 'oww', ARRAY['int', 'text'] ),\n false,\n 'isnt_normal_function(func, noargs)',\n 'Function oww(int, text) should not be a normal function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 278).", "schema": null, "sql": "CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average age of patients with diabetes in rural areas, grouped by state?", "schema": "CREATE TABLE patients (patient_id INT, age INT, has_diabetes BOOLEAN, state VARCHAR); INSERT INTO patients (patient_id, age, has_diabetes, state) VALUES (1, 60, true, 'Ohio'); INSERT INTO patients (patient_id, age, has_diabetes, state) VALUES (2, 55, false, 'Ohio'); CREATE TABLE rural_areas (area_id INT, state VARCHAR); INSERT INTO rural_areas (area_id, state) VALUES (1, 'Ohio'); INSERT INTO rural_areas (area_id, state) VALUES (2, 'Texas');", "sql": "SELECT state, AVG(age) FROM patients JOIN rural_areas ON patients.state = rural_areas.state WHERE has_diabetes = true GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Calculate the moving average of donation amounts for the last three months for each donor, ordered by donation date.", "schema": "CREATE TABLE Donors (DonorID int, Name varchar(50)); CREATE TABLE Donations (DonationID int, DonorID int, Amount decimal(10,2), DonationDate date); INSERT INTO Donors (DonorID, Name) VALUES (1, 'James'), (2, 'Michelle'), (3, 'David'); INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (1, 1, 500.00, '2022-01-15'), (2, 1, 300.00, '2022-02-01'), (3, 2, 200.00, '2022-03-01'), (4, 3, 1500.00, '2022-04-01'), (5, 1, 800.00, '2022-05-01'), (6, 2, 400.00, '2022-06-01');", "sql": "SELECT D.DonorID, D.Name, D.DonationDate, AVG(D.Amount) OVER (PARTITION BY D.DonorID ORDER BY D.DonationDate ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS MovingAverage FROM Donors D JOIN Donations DD ON D.DonorID = DD.DonorID ORDER BY DD.DonationDate;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 251, "num_statements": 1} {"question": "Find the number of impact investments made by each investor in 2020.", "schema": "CREATE TABLE Investments (InvestmentID int, InvestorName varchar(50), InvestmentType varchar(50), Sector varchar(50), InvestmentAmount numeric(18,2), InvestmentDate date); INSERT INTO Investments (InvestmentID, InvestorName, InvestmentType, Sector, InvestmentAmount, InvestmentDate) VALUES (1, 'Investor1', 'Impact Investment', 'Technology', 10000, '2020-01-01'), (2, 'Investor2', 'Impact Investment', 'Finance', 15000, '2019-01-01'), (3, 'Investor1', 'Impact Investment', 'Renewable Energy', 12000, '2020-01-01'), (4, 'Investor3', 'Impact Investment', 'Healthcare', 14000, '2020-01-01');", "sql": "SELECT InvestorName, COUNT(*) FROM Investments WHERE YEAR(InvestmentDate) = 2020 AND InvestmentType = 'Impact Investment' GROUP BY InvestorName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "What is the maximum number of flight hours for commercial airlines in the European Union in 2019?", "schema": "CREATE TABLE flight_safety (airline VARCHAR(50), region VARCHAR(50), flight_hours INT, year INT); INSERT INTO flight_safety (airline, region, flight_hours, year) VALUES ('Lufthansa', 'European Union', 120000, 2019), ('Ryanair', 'European Union', 150000, 2019);", "sql": "SELECT MAX(flight_hours) FROM flight_safety WHERE region = 'European Union' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many countries has a gdp (nominal) of $29.9 billion?", "schema": "CREATE TABLE table_11780179_1 (gdp_per_capita__nominal_ VARCHAR, gdp__nominal_ VARCHAR)", "sql": "SELECT COUNT(gdp_per_capita__nominal_) FROM table_11780179_1 WHERE gdp__nominal_ = '$29.9 billion';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 158).", "schema": null, "sql": "create table iboolpart_t partition of iboolpart for values in ('false');", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Record (example 58).", "schema": null, "sql": "ALTER TYPE type_record DROP ATTRIBUTE second;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many deaths occurred during Darby?", "schema": "CREATE TABLE table_name_64 (deaths VARCHAR, storm_name VARCHAR)", "sql": "SELECT deaths FROM table_name_64 WHERE storm_name = 'darby';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the airdate of 21 series number?", "schema": "CREATE TABLE table_11630008_3 (original_air_date VARCHAR, series_no VARCHAR)", "sql": "SELECT original_air_date FROM table_11630008_3 WHERE series_no = 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Delete the investment with the given id.", "schema": "CREATE TABLE investments (id INT, sector VARCHAR(20), amount FLOAT); INSERT INTO investments (id, sector, amount) VALUES (1, 'Education', 150000.00), (2, 'Healthcare', 120000.00);", "sql": "DELETE FROM investments WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "How many military vehicles were maintained in Texas in the last 6 months?", "schema": "CREATE TABLE military_equipment (equipment_id INT, equipment_type TEXT, last_maintenance_date DATE, state TEXT); INSERT INTO military_equipment (equipment_id, equipment_type, last_maintenance_date, state) VALUES (1, 'Tank', '2022-01-01', 'Texas');", "sql": "SELECT COUNT(*) FROM military_equipment WHERE last_maintenance_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) AND state = 'Texas' AND equipment_type = 'Tank';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 279).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_window( 'someschema', 'nada', ARRAY['int'] ),\n false,\n 'isnt_window(schema, nowin, arg)',\n 'Function someschema.nada(int) should not be a window function',\n ' Function someschema.nada(int) does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 257, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which france nationality has a lane larger than 3?", "schema": "CREATE TABLE table_name_19 (name VARCHAR, lane VARCHAR, nationality VARCHAR)", "sql": "SELECT name FROM table_name_19 WHERE lane > 3 AND nationality = 'france';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many spacecraft were deployed by the US and Europe by 2015-12-31?", "schema": "CREATE TABLE Spacecraft (ID INT, Name TEXT, Country TEXT, LaunchDate DATE); INSERT INTO Spacecraft (ID, Name, Country, LaunchDate) VALUES (1, 'GSAT-1', 'India', '2004-06-18'), (2, 'INSAT-3A', 'India', '2003-04-10'), (3, 'RS-1', 'Russia', '2012-06-17'), (4, 'Sentinel-1A', 'Europe', '2014-04-03'), (5, 'Juno', 'United States', '2011-08-05'), (6, 'Curiosity', 'United States', '2012-08-06');", "sql": "SELECT COUNT(*) AS TotalSpacecraft FROM Spacecraft WHERE Country IN ('United States', 'Europe') AND LaunchDate <= '2015-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Delete the record with ID 2 from the oceanography_data table", "schema": "CREATE TABLE oceanography_data (id INT PRIMARY KEY, location VARCHAR(255), temperature DECIMAL(5,2), salinity DECIMAL(5,2), depth DECIMAL(5,2));", "sql": "WITH deleted_data AS (DELETE FROM oceanography_data WHERE id = 2 RETURNING *) SELECT * FROM deleted_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 105, "num_statements": 1} {"question": "What is the average sustainable urbanism score and total property price for properties in the \"SustainableCity\" schema, grouped by property type?", "schema": "CREATE TABLE Property (id INT, property_type VARCHAR(20), price FLOAT, sustainable_score INT, city VARCHAR(20)); INSERT INTO Property (id, property_type, price, sustainable_score, city) VALUES (1, 'Apartment', 500000, 85, 'SustainableCity'), (2, 'House', 700000, 70, 'SustainableCity'), (3, 'Condo', 300000, 90, 'SustainableCity');", "sql": "SELECT Property.property_type, AVG(Property.sustainable_score) AS avg_sustainable_score, SUM(Property.price) AS total_price FROM Property WHERE Property.city = 'SustainableCity' GROUP BY Property.property_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "What is the number of clinics in the 'rural_healthcare' table located in 'Forest Region'?", "schema": "CREATE TABLE rural_healthcare (name VARCHAR(255), type VARCHAR(255), location VARCHAR(255)); INSERT INTO rural_healthcare (name, type, location) VALUES ('Rural General Hospital', 'Hospital', 'Bushland'), ('Rural Community Hospital', 'Hospital', 'Forest Region'), ('Rural Mental Health Clinic', 'Clinic', 'Desert Region'), ('Rural Dental Clinic', 'Clinic', 'Forest Region');", "sql": "SELECT COUNT(*) FROM rural_healthcare WHERE type = 'Clinic' AND location = 'Forest Region';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT HOMETOWN DOES HE PLAY AS THE CATCHER FOR?", "schema": "CREATE TABLE table_11677100_11 (hometown VARCHAR, position VARCHAR)", "sql": "SELECT hometown FROM table_11677100_11 WHERE position = 'Catcher';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the average ERP W and call sign of w237br", "schema": "CREATE TABLE table_name_36 (erp_w INTEGER, call_sign VARCHAR)", "sql": "SELECT AVG(erp_w) FROM table_name_36 WHERE call_sign = 'w237br';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the English name for the month with พ.ย. abbreviation?", "schema": "CREATE TABLE table_180802_2 (english_name VARCHAR, abbr VARCHAR)", "sql": "SELECT english_name FROM table_180802_2 WHERE abbr = 'พ.ย.';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'generated_virtual' (example 22).", "schema": null, "sql": "CREATE TABLE gtest_err_7d (a int PRIMARY KEY, b int GENERATED ALWAYS AS (generate_series(1, a)) VIRTUAL);", "explanation": "DDL from PostgreSQL core regression test for Generated Virtual.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 105, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE SEQUENCE (example 3).", "schema": null, "sql": "SELECT nextval('serial'); nextval --------- 101;", "explanation": "PostgreSQL CREATE SEQUENCE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 48, "num_statements": 2} {"question": "PostgreSQL regression test 'case': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT NULLIF(a.i,b.i) AS \"NULLIF(a.i,b.i)\",\n NULLIF(b.i, 4) AS \"NULLIF(b.i,4)\"\n FROM CASE_TBL a, CASE2_TBL b;", "explanation": "Regression test for Case in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT NULLIF(a.i,b.i) AS \"NULLIF(a.i,b.i)\",\n NULLIF(b.i, 4) AS \"NULLIF(b.i,4)\"\n FROM CASE_TBL a, CASE2_TBL b) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What was the total defense diplomacy spending for East European nations in 2017?", "schema": "CREATE TABLE DefenseDiplomacy (nation VARCHAR(50), year INT, spending FLOAT); INSERT INTO DefenseDiplomacy (nation, year, spending) VALUES ('Poland', 2017, 25000000), ('Romania', 2017, 30000000), ('Ukraine', 2017, 35000000), ('Hungary', 2017, 22000000), ('Czech Republic', 2017, 28000000);", "sql": "SELECT SUM(spending) FROM DefenseDiplomacy WHERE nation IN ('Poland', 'Romania', 'Ukraine', 'Hungary', 'Czech Republic') AND year = 2017;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the total number of articles published in each state of the USA for the current year?", "schema": "CREATE TABLE articles (article_id INT, publication_date DATE, state VARCHAR(255)); INSERT INTO articles (article_id, publication_date, state) VALUES (1, '2022-01-01', 'California'), (2, '2022-01-02', 'Texas'), (3, '2022-01-03', 'Florida');", "sql": "SELECT state, COUNT(article_id) FROM articles WHERE YEAR(publication_date) = YEAR(GETDATE()) AND state IN ('California', 'Texas', 'Florida', 'New York', 'Pennsylvania') GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "What is the total salary of community health workers by race?", "schema": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), race VARCHAR(50), salary DECIMAL(10,2)); INSERT INTO community_health_workers (id, name, race, salary) VALUES (1, 'John Doe', 'White', 60000.00), (2, 'Jane Smith', 'Black', 55000.00), (3, 'Jim Brown', 'Hispanic', 72000.00);", "sql": "SELECT race, SUM(salary) FROM community_health_workers GROUP BY race;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Calculate the moving average of animal populations for each species over the last three records, if available.", "schema": "CREATE TABLE animal_population (species VARCHAR(255), year INT, population INT); INSERT INTO animal_population (species, year, population) VALUES ('Tiger', 2018, 63), ('Tiger', 2019, 65), ('Tiger', 2020, 68), ('Lion', 2018, 50), ('Lion', 2019, 52), ('Lion', 2020, 55);", "sql": "SELECT species, year, AVG(population) OVER (PARTITION BY species ORDER BY year ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_average FROM animal_population;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 162, "num_statements": 1} {"question": "What is the minimum humidity recorded for each sensor in 'farm2'?", "schema": "CREATE TABLE sensor (id INT, name VARCHAR(20), location VARCHAR(20), type VARCHAR(20)); INSERT INTO sensor (id, name, location, type) VALUES (1, 'sensor1', 'farm1', 'temperature'), (2, 'sensor2', 'farm2', 'humidity'), (3, 'sensor3', 'farm3', 'temperature'); CREATE TABLE humidity (id INT, sensor_id INT, timestamp DATETIME, value FLOAT); INSERT INTO humidity (id, sensor_id, timestamp, value) VALUES (1, 2, '2022-07-01 00:00:00', 60.3), (2, 2, '2022-07-01 12:00:00', 55.1), (3, 2, '2022-07-02 00:00:00', 58.9);", "sql": "SELECT sensor.name as sensor_name, MIN(value) as min_humidity FROM humidity JOIN sensor ON humidity.sensor_id = sensor.id WHERE sensor.location = 'farm2' GROUP BY sensor.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of gold medals for clubs?", "schema": "CREATE TABLE club_rank (Gold INTEGER)", "sql": "SELECT AVG(Gold) FROM club_rank;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "List all the transactions from socially responsible lending with a transaction amount greater than $5000.", "schema": "CREATE TABLE socially_responsible_lending (transaction_id INT, client_id INT, transaction_amount DECIMAL(10,2)); INSERT INTO socially_responsible_lending (transaction_id, client_id, transaction_amount) VALUES (1, 4, 7000.00), (2, 5, 6000.00), (3, 6, 5500.00), (4, 7, 8000.00);", "sql": "SELECT * FROM socially_responsible_lending WHERE transaction_amount > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What are the launch dates of space missions having the same launch country?", "schema": "CREATE TABLE space_missions (id INT, name VARCHAR(50), start_date DATE, launch_country VARCHAR(50)); INSERT INTO space_missions VALUES (1, 'Apollo 11', '1969-07-16', 'USA'), (2, 'Apollo 13', '1970-04-11', 'USA'), (3, 'Mars Pathfinder', '1996-12-04', 'USA'), (4, 'Cassini-Huygens', '1997-10-15', 'France');", "sql": "SELECT name, launch_country, start_date FROM space_missions WHERE launch_country IN (SELECT launch_country FROM space_missions GROUP BY launch_country HAVING COUNT(*) > 1);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which train number arrives in lonavla at 17:45", "schema": "CREATE TABLE table_29301050_1 (train_number VARCHAR, arrival_lonavla VARCHAR)", "sql": "SELECT train_number FROM table_29301050_1 WHERE arrival_lonavla = '17:45';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 154).", "schema": null, "sql": "SELECT nummultirange(numrange(1,5), numrange(6,9)) @> numrange(6,7);", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange(numrange(1,5), numrange(6,9)) @> numrange(6,7)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Advisor 1121 has how many students?", "schema": "CREATE TABLE Student (Advisor VARCHAR)", "sql": "SELECT COUNT(*) FROM Student WHERE Advisor = 1121;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the number of routes with destination airports in Italy.", "schema": "CREATE TABLE routes (dst_apid VARCHAR); CREATE TABLE airports (apid VARCHAR, country VARCHAR)", "sql": "SELECT COUNT(*) FROM routes AS T1 JOIN airports AS T2 ON T1.dst_apid = T2.apid WHERE T2.country = 'Italy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Update the contact information for the disability services office at the specified campus, including phone number and email address.", "schema": "CREATE TABLE campus (id INT, name VARCHAR(50), region VARCHAR(50)); CREATE TABLE disability_services_office (id INT, campus_id INT, phone VARCHAR(20), email VARCHAR(50));", "sql": "UPDATE disability_services_office dso SET phone = '555-123-4567', email = 'ds@campus.edu' WHERE campus_id = (SELECT id FROM campus WHERE name = 'Campus Name');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which game was played at the Forum and led to a series result of 0-1?", "schema": "CREATE TABLE table_name_85 (game VARCHAR, location_attendance VARCHAR, series VARCHAR)", "sql": "SELECT game FROM table_name_85 WHERE location_attendance = 'the forum' AND series = '0-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many clinical trials were conducted in Asia in 2019?", "schema": "CREATE TABLE clinical_trials (country TEXT, year INTEGER, trials INTEGER); INSERT INTO clinical_trials (country, year, trials) VALUES ('Japan', 2019, 500); INSERT INTO clinical_trials (country, year, trials) VALUES ('China', 2019, 800);", "sql": "SELECT SUM(trials) FROM clinical_trials WHERE country IN ('Japan', 'China') AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the percentage of fair trade products by category?", "schema": "CREATE TABLE FairTradeProducts (product_id INT, product_name VARCHAR(255), category VARCHAR(255), is_fair_trade BOOLEAN); INSERT INTO FairTradeProducts (product_id, product_name, category, is_fair_trade) VALUES (1, 'Organic Cotton T-Shirt', 'Tops', true), (2, 'Conventional Cotton Pants', 'Bottoms', false), (3, 'Fair Trade Coffee', 'Food', true), (4, 'Recycled Polyester Jacket', 'Outerwear', true), (5, 'Conventional Cotton Socks', 'Accessories', false);", "sql": "SELECT category, ROUND(COUNT(*) FILTER (WHERE is_fair_trade = true) * 100.0 / COUNT(*), 2) as fair_trade_percentage FROM FairTradeProducts GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 157, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 21).", "schema": null, "sql": "CREATE FUNCTION gtrgm_decompress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Tournament, when Margin is Victory of 6 Strokes?", "schema": "CREATE TABLE table_name_30 (tournament VARCHAR, margin_of_victory VARCHAR)", "sql": "SELECT tournament FROM table_name_30 WHERE margin_of_victory = '6 strokes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 854).", "schema": null, "sql": "select * from j1\ninner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2\nwhere j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 >= any (array[1,5]);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from j1\ninner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2\nwhere j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 >= any (array[1,5])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the status of the building for 2014 with 33 floors?", "schema": "CREATE TABLE table_name_54 (status VARCHAR, year VARCHAR, floors VARCHAR)", "sql": "SELECT status FROM table_name_54 WHERE year < 2014 AND floors = 33;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the away score for Carlton?", "schema": "CREATE TABLE table_name_41 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_41 WHERE away_team = 'carlton';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL Queries: show example 59.", "schema": null, "sql": "=> SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num WHERE t2.value = 'xxx'; num | name | num | value -----+------+-----+------- 1 | a | 1 | xxx (1 row);", "explanation": "Example from PostgreSQL documentation on Queries.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What is the League Goals when the FA Cup Goals are 0, position is mf, League Cup Apps of 0, and name is Ben Thornley?", "schema": "CREATE TABLE table_name_98 (league_goals INTEGER, name VARCHAR, league_cup_apps VARCHAR, fa_cup_goals VARCHAR, position VARCHAR)", "sql": "SELECT AVG(league_goals) FROM table_name_98 WHERE fa_cup_goals = '0' AND position = 'mf' AND league_cup_apps = '0' AND name = 'ben thornley';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the original names of the districts where the population in the 2010 census was 210450?", "schema": "CREATE TABLE table_1104312_5 (original_name VARCHAR, population_at_2010_census VARCHAR)", "sql": "SELECT original_name FROM table_1104312_5 WHERE population_at_2010_census = 210450;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the sum of draws when the position is less than 17 and wins is less than 11", "schema": "CREATE TABLE table_name_82 (draws INTEGER, position VARCHAR, wins VARCHAR)", "sql": "SELECT SUM(draws) FROM table_name_82 WHERE position < 17 AND wins < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "How many farms are there in each location with an area greater than 150 acres, and what is the total irrigated area in those farms?", "schema": "CREATE TABLE farms (id INT, name VARCHAR(50), location VARCHAR(50), acres FLOAT, irrigation BOOLEAN); INSERT INTO farms (id, name, location, acres, irrigation) VALUES (1, 'Smith Farm', 'Georgia', 200.5, TRUE); INSERT INTO farms (id, name, location, acres, irrigation) VALUES (2, 'Brown Farm', 'California', 180.0, FALSE); INSERT INTO farms (id, name, location, acres, irrigation) VALUES (3, 'Jones Farm', 'California', 250.0, TRUE);", "sql": "SELECT context.location AS location, COUNT(*) AS num_farms, SUM(CASE WHEN context.irrigation = TRUE THEN context.acres END) AS irrigated_acres FROM (SELECT location, acres, irrigation FROM farms WHERE acres > 150) AS context GROUP BY context.location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 251, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the To par of payne stewart?", "schema": "CREATE TABLE table_name_4 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_4 WHERE player = 'payne stewart';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 220).", "schema": null, "sql": "SELECT * FROM (VALUES (1),(2),(3)) v1(r1),\n LATERAL (SELECT r1, * FROM (VALUES (10),(20),(30)) v2(r2)\n LEFT JOIN generate_series(r2,r2+3) f(i) ON ((r2+i)<100) OFFSET 0) s1;", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM (VALUES (1),(2),(3)) v1(r1),\n LATERAL (SELECT r1, * FROM (VALUES (10),(20),(30)) v2(r2)\n LEFT JOIN generate_series(r2,r2+3) f(i) ON ((r2+i)<100) OFFSET 0) s1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 225, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country has the score 70-76-68-214?", "schema": "CREATE TABLE table_name_71 (country VARCHAR, score VARCHAR)", "sql": "SELECT country FROM table_name_71 WHERE score = '70-76-68-214';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 385).", "schema": null, "sql": "SELECT interval 'infinity' / 'nan';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval 'infinity' / 'nan') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the frequency of the processor with an sSpec number of sl3bn(kc0)sl3e9(kc0)?", "schema": "CREATE TABLE table_name_79 (frequency VARCHAR, sspec_number VARCHAR)", "sql": "SELECT frequency FROM table_name_79 WHERE sspec_number = 'sl3bn(kc0)sl3e9(kc0)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What are the maximum and minimum temperature for each habitat type?", "schema": "CREATE TABLE fish_habitats (id INT, fish_id INT, habitat_type VARCHAR(20), temperature DECIMAL(5,2)); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (1, 1, 'tropical', 28.3); INSERT INTO fish_habitats (id, fish_id, habitat_type, temperature) VALUES (2, 2, 'temperate', 15.5);", "sql": "SELECT habitat_type, MAX(temperature) as max_temp, MIN(temperature) as min_temp FROM fish_habitats GROUP BY habitat_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which School/Club Team has a Pick of 198?", "schema": "CREATE TABLE table_name_7 (school_club_team VARCHAR, pick VARCHAR)", "sql": "SELECT school_club_team FROM table_name_7 WHERE pick = 198;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'fuzzystrmatch' (example 4).", "schema": null, "sql": "SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');", "explanation": "Example query from the 'fuzzystrmatch' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many disability support programs were added in 'Florida' since 2015?", "schema": "CREATE TABLE program_history (program_id INT, program_name VARCHAR(50), state VARCHAR(50), start_year INT); INSERT INTO program_history (program_id, program_name, state, start_year) VALUES (1, 'Accessible Transportation', 'Florida', 2015), (2, 'Sign Language Interpretation', 'Florida', 2016), (3, 'Adaptive Equipment', 'Florida', 2017);", "sql": "SELECT COUNT(*) FROM program_history WHERE state = 'Florida' AND start_year > 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of injuries caused each time?", "schema": "CREATE TABLE death (injured INTEGER)", "sql": "SELECT AVG(injured) FROM death;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "What is the percentage of hospitals in Los Angeles that offer free COVID-19 testing?", "schema": "CREATE TABLE Hospitals (HospitalID INT, Name VARCHAR(50), City VARCHAR(30), State VARCHAR(20), FreeTesting BOOLEAN); INSERT INTO Hospitals (HospitalID, Name, City, State, FreeTesting) VALUES (1, 'Cedars-Sinai', 'Los Angeles', 'California', TRUE); INSERT INTO Hospitals (HospitalID, Name, City, State, FreeTesting) VALUES (2, 'UCLA Medical Center', 'Los Angeles', 'California', FALSE);", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Hospitals WHERE City = 'Los Angeles')) FROM Hospitals WHERE City = 'Los Angeles' AND FreeTesting = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the number of circular economy initiatives for each province in China in 2020?", "schema": "CREATE TABLE circular_economy_china (province VARCHAR(50), year INT, initiatives INT); INSERT INTO circular_economy_china (province, year, initiatives) VALUES ('Anhui', 2020, 12), ('Beijing', 2020, 15), ('Chongqing', 2020, 18), ('Fujian', 2020, 10), ('Gansu', 2020, 20);", "sql": "SELECT province, initiatives FROM circular_economy_china WHERE year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most number of believers for ܓܘܝܠܢ", "schema": "CREATE TABLE table_24613895_1 (number_of_believers INTEGER, name_in_syriac VARCHAR)", "sql": "SELECT MAX(number_of_believers) FROM table_24613895_1 WHERE name_in_syriac = 'ܓܘܝܠܢ';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of population for врбас", "schema": "CREATE TABLE table_2562572_20 (population__2011_ VARCHAR, cyrillic_name_other_names VARCHAR)", "sql": "SELECT COUNT(population__2011_) FROM table_2562572_20 WHERE cyrillic_name_other_names = 'Врбас';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Delete records of health centers in Northern Mariana Islands.", "schema": "CREATE TABLE health_centers (id INT, name TEXT, location TEXT); INSERT INTO health_centers (id, name, location) VALUES (1, 'Health Center A', 'Rural Alaska'); INSERT INTO health_centers (id, name, location) VALUES (5, 'Health Center E', 'Northern Mariana Islands');", "sql": "DELETE FROM health_centers WHERE location = 'Northern Mariana Islands';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 9).", "schema": null, "sql": "SELECT * FROM test_bit WHERE i>'100'::bit(3) ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the overall number of chosen ideas where the person is scott parker", "schema": "CREATE TABLE table_2840500_3 (pick VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(pick) FROM table_2840500_3 WHERE player = 'Scott Parker';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a SQL definition from the citus project (distributed_types_xact_add_enum_value, item 7).", "schema": null, "sql": "CREATE TABLE t1 (a int PRIMARY KEY, b xact_enum_edit);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total revenue for the 'Spring 2021' and 'Summer 2021' collections?", "schema": "CREATE TABLE sales (collection VARCHAR(20), revenue INT); INSERT INTO sales (collection, revenue) VALUES ('Spring 2021', 500000), ('Summer 2021', 600000);", "sql": "SELECT collection, SUM(revenue) FROM sales WHERE collection IN ('Spring 2021', 'Summer 2021') GROUP BY collection;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Which supplier provided the most Praseodymium in 2017?", "schema": "CREATE TABLE praseodymium_supply (year INT, supplier VARCHAR(20), praseodymium_supply INT); INSERT INTO praseodymium_supply VALUES (2015, 'Supplier F', 22), (2016, 'Supplier G', 27), (2017, 'Supplier H', 32), (2018, 'Supplier I', 37), (2019, 'Supplier J', 42);", "sql": "SELECT supplier, MAX(praseodymium_supply) FROM praseodymium_supply WHERE year = 2017 GROUP BY supplier;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 12).", "schema": null, "sql": "SELECT * FROM t1p_tens WHERE o > 50 AND p like '%64%';", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Delete properties with sustainable urbanism ratings below 70 from the view", "schema": "CREATE VIEW properties AS SELECT * FROM property;", "sql": "DELETE FROM properties WHERE rating < 70;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which venue is South Melbourne the home team?", "schema": "CREATE TABLE table_name_11 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_11 WHERE home_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Delete all warehouses in Germany", "schema": "CREATE TABLE warehouse (id INT, city VARCHAR(20), capacity INT); CREATE TABLE country (id INT, name VARCHAR(20)); INSERT INTO country (id, name) VALUES (1, 'India'), (2, 'USA'), (3, 'Germany'); CREATE VIEW warehouse_country AS SELECT * FROM warehouse INNER JOIN country ON warehouse.id = country.id; INSERT INTO warehouse (id, city, capacity) VALUES (10, 'Berlin', 3000), (11, 'Munich', 3500), (12, 'Frankfurt', 4000); INSERT INTO country (id, name) VALUES (4, 'Germany'); INSERT INTO warehouse_country (id, city, capacity, name) VALUES (10, 'Berlin', 3000, 'Germany'), (11, 'Munich', 3500, 'Germany'), (12, 'Frankfurt', 4000, 'Germany');", "sql": "DELETE FROM warehouse_country WHERE name = 'Germany'; DELETE FROM warehouse WHERE name = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 2} {"question": "Update the type of 'Venus Express' in the 'space_missions' table to 'orbiter'", "schema": "CREATE TABLE space_missions (id INT PRIMARY KEY, name VARCHAR(50), type VARCHAR(50)); INSERT INTO space_missions (id, name, type) VALUES (1, 'Mars Pathfinder', 'rover'), (2, 'Spirit', 'rover'), (3, 'Opportunity', 'rover'), (4, 'Curiosity', 'rover'), (5, 'Venus Express', 'lander');", "sql": "UPDATE space_missions SET type = 'orbiter' WHERE name = 'Venus Express';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 159).", "schema": null, "sql": "CREATE PUBLICATION testpub6 FOR TABLE rf_bug WHERE (status = 'open') WITH (publish = 'insert');", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the team for oct 30, 1989", "schema": "CREATE TABLE table_1594772_2 (team__b_ VARCHAR, match_date VARCHAR)", "sql": "SELECT team__b_ FROM table_1594772_2 WHERE match_date = 'Oct 30, 1989';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which gender makes up the majority of the staff?", "schema": "CREATE TABLE staff (gender VARCHAR)", "sql": "SELECT gender FROM staff GROUP BY gender ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum production cost for items made of recycled polyester?", "schema": "CREATE TABLE products (id INT, name TEXT, material TEXT, production_cost FLOAT); INSERT INTO products (id, name, material, production_cost) VALUES (1, 'Jacket', 'Recycled Polyester', 75.0), (2, 'Shoes', 'Recycled Polyester', 50.0);", "sql": "SELECT MAX(production_cost) FROM products WHERE material = 'Recycled Polyester';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the minimum size (in square kilometers) of a habitat for animals in the 'habitats' table that are not mammals?", "schema": "CREATE TABLE habitats (id INT, animal_type VARCHAR(50), size_km FLOAT); INSERT INTO habitats (id, animal_type, size_km) VALUES (1, 'Mammal', 45.1), (2, 'Reptile', 25.1);", "sql": "SELECT MIN(size_km) FROM habitats WHERE animal_type != 'Mammal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the broadcaster in 2003?", "schema": "CREATE TABLE table_name_24 (broadcaster VARCHAR, year VARCHAR)", "sql": "SELECT broadcaster FROM table_name_24 WHERE year = '2003';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the manner of departure for luis enrique", "schema": "CREATE TABLE table_27666856_3 (manner_of_departure VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT manner_of_departure FROM table_27666856_3 WHERE outgoing_manager = 'Luis Enrique';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the game on February 26 with the Chicago black hawks as the home team and the New York Rangers as the visitor team?", "schema": "CREATE TABLE table_name_59 (score VARCHAR, date VARCHAR, home VARCHAR, visitor VARCHAR)", "sql": "SELECT score FROM table_name_59 WHERE home = 'chicago black hawks' AND visitor = 'new york rangers' AND date = 'february 26';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "How many customers have been served by Shariah-compliant financial institutions in the Middle East offering financial wellbeing products?", "schema": "CREATE TABLE Customers (CustomerID int, InstitutionID int, Location varchar(50)); INSERT INTO Customers (CustomerID, InstitutionID, Location) VALUES (1, 1, 'Middle East'); CREATE TABLE Institutions (InstitutionID int, Name varchar(50), ShariahCompliant bit, FinancialWellbeing bit); INSERT INTO Institutions (InstitutionID, Name, ShariahCompliant, FinancialWellbeing) VALUES (1, 'Institution A', 1, 1);", "sql": "SELECT COUNT(*) FROM Customers C INNER JOIN Institutions I ON C.InstitutionID = I.InstitutionID WHERE I.ShariahCompliant = 1 AND I.FinancialWellbeing = 1 AND C.Location = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "What is the difference in mental health visits between consecutive patients, ordered by PatientID?", "schema": "CREATE TABLE MentalHealthParity (PatientID int, MentalHealthVisits int); INSERT INTO MentalHealthParity (PatientID, MentalHealthVisits) VALUES (1, 5), (2, 3), (3, 6), (4, 4), (5, 8), (6, 7), (7, 6);", "sql": "SELECT PatientID, MentalHealthVisits, LAG(MentalHealthVisits) OVER (ORDER BY PatientID) AS PreviousVisits, MentalHealthVisits - LAG(MentalHealthVisits) OVER (ORDER BY PatientID) AS VisitDifference FROM MentalHealthParity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 221, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who drives for the sponsor w.h. bolin?", "schema": "CREATE TABLE table_name_82 (driver VARCHAR, sponsor VARCHAR)", "sql": "SELECT driver FROM table_name_82 WHERE sponsor = 'w.h. bolin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "List all sports types", "schema": "CREATE TABLE sports (id INT PRIMARY KEY, sport_name VARCHAR(100));", "sql": "SELECT sport_name FROM sports;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest quarterfinal week when the genre is dancing and the act is 32?", "schema": "CREATE TABLE table_27529608_21 (qtr_final__week_ INTEGER, genre VARCHAR, age_s_ VARCHAR)", "sql": "SELECT MIN(qtr_final__week_) FROM table_27529608_21 WHERE genre = 'Dancing' AND age_s_ = '32';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "What is the total value of paintings sold by female artists in Germany?", "schema": "CREATE TABLE Artists (id INT, name VARCHAR(255), gender VARCHAR(6)); CREATE TABLE ArtWork (id INT, title VARCHAR(255), artist_id INT, price DECIMAL(10,2), type VARCHAR(255)); INSERT INTO Artists (id, name, gender) VALUES (1, 'ArtistX', 'Female'); INSERT INTO ArtWork (id, title, artist_id, price, type) VALUES (1, 'Painting1', 1, 12000, 'Painting');", "sql": "SELECT SUM(ArtWork.price) FROM ArtWork INNER JOIN Artists ON ArtWork.artist_id = Artists.id WHERE ArtWork.type = 'Painting' AND Artists.gender = 'Female' AND Artists.name LIKE '%Germany%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Find the number of unique workout activities for members who joined in 2020.", "schema": "CREATE TABLE members_2020 (id INT, name VARCHAR(50), country VARCHAR(50), joined DATE); INSERT INTO members_2020 (id, name, country, joined) VALUES (6, 'Charlie Davis', 'Canada', '2020-05-10'); CREATE TABLE member_workout (member_id INT, activity VARCHAR(50)); INSERT INTO member_workout (member_id, activity) VALUES (1, 'Running'); INSERT INTO member_workout (member_id, activity) VALUES (1, 'Cycling'); INSERT INTO member_workout (member_id, activity) VALUES (6, 'Swimming'); INSERT INTO member_workout (member_id, activity) VALUES (6, 'Yoga');", "sql": "SELECT member_id, COUNT(DISTINCT activity) as unique_activities FROM member_workout GROUP BY member_id HAVING joined >= '2020-01-01' AND joined < '2021-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Insert a new record with species_id 5, species_name 'reindeer', and biomass 180.0 in the 'species_data' table.", "schema": "CREATE TABLE species_data (species_id INT, species_name VARCHAR(255), biomass FLOAT);", "sql": "INSERT INTO species_data (species_id, species_name, biomass) VALUES (5, 'reindeer', 180.0);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Which drugs had zero sales in '2019' but not in '2018' and '2020'?", "schema": "CREATE TABLE sales_yearly_2(year int, drug varchar(10), revenue int); INSERT INTO sales_yearly_2(year, drug, revenue) VALUES(2018, 'DrugM', 500), (2019, 'DrugM', 0), (2020, 'DrugM', 600);", "sql": "SELECT drug FROM sales_yearly_2 WHERE revenue = 0 AND year = 2019 INTERSECT SELECT drug FROM sales_yearly_2 WHERE year IN (2018, 2020) AND revenue > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Which cruelty-free certified products are most preferred by consumers in the USA?", "schema": "CREATE TABLE cosmetics_preferences (id INT, consumer_id INT, product_id INT, preference_score INT, country VARCHAR(255)); INSERT INTO cosmetics_preferences (id, consumer_id, product_id, preference_score, country) VALUES (1, 1, 1, 5, 'USA'); CREATE TABLE products (id INT, name VARCHAR(255), is_cruelty_free BOOLEAN); INSERT INTO products (id, name, is_cruelty_free) VALUES (1, 'Natural Glow Foundation', true);", "sql": "SELECT p.name, cp.preference_score FROM cosmetics_preferences cp INNER JOIN products p ON cp.product_id = p.id WHERE cp.country = 'USA' AND p.is_cruelty_free = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Model has the GCM (kg) Technical Capacity of 42000?", "schema": "CREATE TABLE table_name_52 (model VARCHAR, gcm__kg__technical_capacity VARCHAR)", "sql": "SELECT model FROM table_name_52 WHERE gcm__kg__technical_capacity = '42000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What party established in 1797 won an election in 2007?", "schema": "CREATE TABLE table_name_17 (party VARCHAR, established VARCHAR, election VARCHAR)", "sql": "SELECT party FROM table_name_17 WHERE established > 1797 AND election = 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the maximum depth of all deep-sea expeditions in the Pacific Ocean?", "schema": "CREATE TABLE deep_sea_expeditions (expedition_name VARCHAR(255), depth FLOAT, ocean VARCHAR(255));", "sql": "SELECT MAX(depth) FROM deep_sea_expeditions WHERE ocean = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average billing amount for attorneys in the 'billing' table, grouped by their specialty?", "schema": "CREATE TABLE attorneys (attorney_id INT, specialty VARCHAR(255)); CREATE TABLE billing (bill_id INT, attorney_id INT, amount DECIMAL(10,2));", "sql": "SELECT a.specialty, AVG(b.amount) FROM attorneys a INNER JOIN billing b ON a.attorney_id = b.attorney_id GROUP BY a.specialty;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who's the opponent of the game with the record 64-51?", "schema": "CREATE TABLE table_name_60 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_60 WHERE record = '64-51';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of titles written by adam i. lapidus", "schema": "CREATE TABLE table_12030612_9 (title VARCHAR, written_by VARCHAR)", "sql": "SELECT COUNT(title) FROM table_12030612_9 WHERE written_by = 'Adam I. Lapidus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What are the earliest artifacts in each excavation site?", "schema": "CREATE TABLE excavation_sites (site_id INT, site_name VARCHAR(255)); CREATE TABLE artifacts (artifact_id INT, site_id INT, artifact_type VARCHAR(255), date_found DATE); INSERT INTO excavation_sites (site_id, site_name) VALUES (1, 'site_a'), (2, 'site_b'), (3, 'site_c'); INSERT INTO artifacts (artifact_id, site_id, artifact_type, date_found) VALUES (1, 1, 'Pottery', '2020-01-01'), (2, 1, 'Bone Fragments', '2019-01-01'), (3, 2, 'Pottery', '2021-01-01'), (4, 2, 'Coins', '2020-01-01'), (5, 3, 'Bone Fragments', '2018-01-01'), (6, 3, 'Bronze Tools', '2019-01-01');", "sql": "SELECT site_name, MIN(date_found) as earliest_date FROM excavation_sites s JOIN artifacts a ON s.site_id = a.site_id GROUP BY site_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the number of smallpox when typhoid fever is 293", "schema": "CREATE TABLE table_1007688_1 (smallpox INTEGER, typhoid_fever VARCHAR)", "sql": "SELECT MAX(smallpox) FROM table_1007688_1 WHERE typhoid_fever = 293;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the maximum safety rating for algorithmic fairness models?", "schema": "CREATE TABLE fairness_models (model_name TEXT, safety_rating INTEGER); INSERT INTO fairness_models (model_name, safety_rating) VALUES ('Model X', 9), ('Model Y', 8), ('Model Z', 10);", "sql": "SELECT MAX(safety_rating) FROM fairness_models WHERE model_name LIKE '%fairness%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total number of cybersecurity incidents and the average severity for each department, partitioned by month and ordered by total number of cybersecurity incidents in descending order?", "schema": "CREATE TABLE cybersecurity_incident (id INT, department_id INT, severity INT, incident_date DATE); INSERT INTO cybersecurity_incident (id, department_id, severity, incident_date) VALUES (1, 1, 8, '2021-03-15'); INSERT INTO cybersecurity_incident (id, department_id, severity, incident_date) VALUES (2, 2, 5, '2022-01-10'); CREATE TABLE department (id INT, name VARCHAR(255)); INSERT INTO department (id, name) VALUES (1, 'IT'); INSERT INTO department (id, name) VALUES (2, 'Security');", "sql": "SELECT d.name as department, DATEPART(YEAR, incident_date) as year, DATEPART(MONTH, incident_date) as month, COUNT(ci.id) as total_cybersecurity_incidents, AVG(ci.severity) as avg_severity, ROW_NUMBER() OVER (PARTITION BY d.name ORDER BY COUNT(ci.id) DESC) as rank FROM cybersecurity_incident ci JOIN department d ON ci.department_id = d.id GROUP BY d.name, DATEPART(YEAR, incident_date), DATEPART(MONTH, incident_date) ORDER BY total_cybersecurity_incidents DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 464, "num_statements": 1} {"question": "What is the number of professional development events attended by teachers in each region, ordered by attendance?", "schema": "CREATE TABLE teacher_events (teacher_id INT, region VARCHAR(20), event_attended INT); INSERT INTO teacher_events (teacher_id, region, event_attended) VALUES (1, 'North', 2), (2, 'North', 1), (3, 'South', 3), (4, 'South', 0);", "sql": "SELECT region, SUM(event_attended) as total_events, ROW_NUMBER() OVER (ORDER BY SUM(event_attended) DESC) as rank FROM teacher_events GROUP BY region ORDER BY rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 164, "num_statements": 1} {"question": "What are the total R&D expenditures and sales amounts for each drug, unpivoted and with a total row?", "schema": "CREATE TABLE RnDExpenditures (drug_name VARCHAR(255), rnd_expenditure DECIMAL(10,2)); INSERT INTO RnDExpenditures (drug_name, rnd_expenditure) VALUES ('DrugD', 60000.00), ('DrugE', 80000.00), ('DrugF', 40000.00); CREATE TABLE SalesData (drug_name VARCHAR(255), sales_quantity INT, sales_amount DECIMAL(10,2)); INSERT INTO SalesData (drug_name, sales_quantity, sales_amount) VALUES ('DrugD', 120, 18000.00), ('DrugE', 150, 22500.00), ('DrugF', 75, 10500.00);", "sql": "SELECT drug_name, 'rnd_expenditure' as metric, SUM(rnd_expenditure) as value FROM RnDExpenditures GROUP BY drug_name UNION ALL SELECT drug_name, 'sales_amount' as metric, SUM(sales_amount) as value FROM SalesData GROUP BY drug_name UNION ALL SELECT 'Total', SUM(value) as value FROM (SELECT drug_name, 'rnd_expenditure' as metric, SUM(rnd_expenditure) as value FROM RnDExpenditures GROUP BY drug_name UNION ALL SELECT drug_name, 'sales_amount' as metric, SUM(sales_amount) as value FROM SalesData GROUP BY drug_name) sub;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 521, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the final score of Washington's home game?", "schema": "CREATE TABLE table_28298589_4 (result VARCHAR, home_team VARCHAR)", "sql": "SELECT result FROM table_28298589_4 WHERE home_team = 'Washington';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total budget allocated for public transportation in New York City?", "schema": "CREATE TABLE public_transportation (transport_id INT, transport_name TEXT, city TEXT, state TEXT, budget INT); INSERT INTO public_transportation (transport_id, transport_name, city, state, budget) VALUES (1, 'New York City Subway', 'New York', 'New York', 8000000000); INSERT INTO public_transportation (transport_id, transport_name, city, state, budget) VALUES (2, 'MTA Bus Company', 'New York', 'New York', 500000000); INSERT INTO public_transportation (transport_id, transport_name, city, state, budget) VALUES (3, 'Staten Island Ferry', 'New York', 'New York', 200000000);", "sql": "SELECT SUM(budget) FROM public_transportation WHERE city = 'New York' AND state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What was the average sale price for all artworks in the 'Rococo' movement that were sold by the 'Victoria and Albert Museum'?", "schema": "CREATE TABLE Artworks (artwork_id INT, movement VARCHAR(255), sale_price DECIMAL(10, 2), museum_name VARCHAR(255));", "sql": "SELECT AVG(sale_price) FROM Artworks WHERE movement = 'Rococo' AND museum_name = 'Victoria and Albert Museum';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the winner with tournament value of kroger senior classic", "schema": "CREATE TABLE table_11621915_1 (winner VARCHAR, tournament VARCHAR)", "sql": "SELECT winner FROM table_11621915_1 WHERE tournament = 'Kroger Senior Classic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Delete all records in the 'rural_infrastructure' table where the budget is less than 50000.", "schema": "CREATE TABLE rural_infrastructure (id INT, project_name VARCHAR(255), budget INT);", "sql": "DELETE FROM rural_infrastructure WHERE budget < 50000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Find the number of autonomous shuttles in each city", "schema": "CREATE TABLE autonomous_vehicles (id INT PRIMARY KEY, make VARCHAR(255), model VARCHAR(255), year INT, city VARCHAR(255));", "sql": "CREATE VIEW autonmous_shuttles AS SELECT city, COUNT(*) as num_shuttles FROM autonomous_vehicles WHERE make = 'Wayve' AND model = 'Kamino' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Is it home or away when opponent is Pride with a W 11-10 result?", "schema": "CREATE TABLE table_name_49 (home_away VARCHAR, opponent VARCHAR, result VARCHAR)", "sql": "SELECT home_away FROM table_name_49 WHERE opponent = 'pride' AND result = 'w 11-10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which athlete is ranked below 5 and is from China?", "schema": "CREATE TABLE table_name_62 (athletes VARCHAR, rank VARCHAR, country VARCHAR)", "sql": "SELECT athletes FROM table_name_62 WHERE rank < 5 AND country = 'china';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 186).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (4,1,'5329378275943663322300488.64471790965256505869684245785528331091076155554650629138833809683459634328609777839510066435612911583108717191216693735823717997111970662575497378762952496582183738308720094529950793570383580785385569873278068217936841324404119828637880370718028782103860007754579779716996004352284614661690063919125301052941328989181561787543541920734755989452320799185700078241880935083616978140555713297241612718277766918005268951861880490889884082730841740604517529391011862694381726143520658746305661338923049035040974032671138430612839043269997482582763267536489504794826476836323549796385028155416935072959933315468068930689064483178204550825728947252440604703474049780550458442808479096492346910001692358508618202898514895453589357');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 788, "num_statements": 1} {"question": "Who are the male actors with more than 5 movies acted?", "schema": "CREATE TABLE actors (name VARCHAR(255), gender VARCHAR(10), movies INTEGER); INSERT INTO actors (name, gender, movies) VALUES ('ActorA', 'Female', 3), ('ActorB', 'Male', 15), ('ActorC', 'Female', 2), ('ActorD', 'Male', 20), ('ActorE', 'Female', 8), ('ActorF', 'Male', 7), ('ActorG', 'Female', 6), ('ActorH', 'Male', 12);", "sql": "SELECT name FROM actors WHERE gender = 'Male' AND movies > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "How many volunteer hours were recorded for environmental programs in California?", "schema": "CREATE TABLE VolunteerEvents (EventID INT, EventName TEXT, Location TEXT, EventType TEXT); INSERT INTO VolunteerEvents (EventID, EventName, Location, EventType) VALUES (1, 'Beach Cleanup', 'California', 'Environment'), (2, 'Tree Planting', 'New York', 'Environment');", "sql": "SELECT SUM(VolunteerHours) FROM VolunteerEvents JOIN VolunteerHours ON VolunteerEvents.EventID = VolunteerHours.EventID WHERE VolunteerEvents.Location = 'California' AND VolunteerEvents.EventType = 'Environment';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Geelong's score when they were the away team?", "schema": "CREATE TABLE table_name_62 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_62 WHERE away_team = 'geelong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Find the client with the highest account balance in each region.", "schema": "CREATE TABLE clients (client_id INT, name VARCHAR(50), region VARCHAR(20), account_balance DECIMAL(10,2)); INSERT INTO clients (client_id, name, region, account_balance) VALUES (1, 'John Smith', 'West', 30000.00), (2, 'Jane Doe', 'Northeast', 22000.00), (3, 'Mike Johnson', 'West', 35000.00), (4, 'Sara Jones', 'Southeast', 12000.00), (5, 'William Brown', 'Northeast', 25000.00), (6, 'Emily Davis', 'Southeast', 40000.00);", "sql": "SELECT client_id, name, region, account_balance FROM (SELECT client_id, name, region, account_balance, DENSE_RANK() OVER (PARTITION BY region ORDER BY account_balance DESC) as rank FROM clients) AS ranked_clients WHERE rank = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 228, "num_statements": 1} {"question": "Count the number of military equipment maintenance requests for each type of equipment in the state of New York", "schema": "CREATE TABLE military_equipment (equipment_id INT, name VARCHAR(255), type VARCHAR(255), maintenance_cost DECIMAL(10,2), state VARCHAR(2)); CREATE TABLE maintenance_requests (request_id INT, equipment_id INT, request_date DATE, branch VARCHAR(255));", "sql": "SELECT equipment_type, COUNT(*) as num_requests FROM military_equipment JOIN maintenance_requests ON military_equipment.equipment_id = maintenance_requests.equipment_id WHERE state = 'New York' GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "What is the average word count of articles published on politics in the last month, grouped by week?", "schema": "CREATE TABLE articles (id INT, title VARCHAR(255), word_count INT, publish_date DATE, topic VARCHAR(255)); INSERT INTO articles (id, title, word_count, publish_date, topic) VALUES (1, 'Article 1', 500, '2022-01-01', 'politics'), (2, 'Article 2', 700, '2022-01-05', 'politics');", "sql": "SELECT AVG(word_count), WEEKOFYEAR(publish_date) AS Week FROM articles WHERE topic = 'politics' AND publish_date >= DATE_SUB(NOW(), INTERVAL 1 MONTH) GROUP BY Week;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the opponents that played on a hard surface on July 13, 2008?", "schema": "CREATE TABLE table_name_85 (opponents VARCHAR, surface VARCHAR, date VARCHAR)", "sql": "SELECT opponents FROM table_name_85 WHERE surface = 'hard' AND date = 'july 13, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games were played on october 16?", "schema": "CREATE TABLE table_27756014_1 (high_points VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(high_points) FROM table_27756014_1 WHERE date = 'October 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the outgoing manager replaced by Marian Bucurescu?", "schema": "CREATE TABLE table_17115950_2 (outgoing_manager VARCHAR, replaced_by VARCHAR)", "sql": "SELECT outgoing_manager FROM table_17115950_2 WHERE replaced_by = 'Marian Bucurescu';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In the Western Oval venue, what is the average crowd?", "schema": "CREATE TABLE table_name_38 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT AVG(crowd) FROM table_name_38 WHERE venue = 'western oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Present the number of cases and their respective outcomes for a given region", "schema": "CREATE TABLE region_outcomes (region VARCHAR(50) PRIMARY KEY, cases_handled INT, win_rate DECIMAL(5,4));", "sql": "SELECT region, SUM(cases_handled) FROM region_outcomes GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (test-xform, item 2).", "schema": null, "sql": "CREATE VIEW xform_test_view AS\n SELECT xform_test.*,\n users.url,\n users.login,\n users.avatar_url,\n users.gravatar_id,\n users.display_login\n FROM xform_test\n LEFT JOIN users ON xform_test.user_id = users.id;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "Count the number of accidents for each aircraft.", "schema": "CREATE TABLE FlightSafety (accident_id INT, aircraft_id INT, accident_date DATE);", "sql": "SELECT aircraft_id, COUNT(*) FROM FlightSafety GROUP BY aircraft_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the monthly production volume of sustainable garments in each country?", "schema": "CREATE TABLE productions (id INT, garment VARCHAR(50), material VARCHAR(50), country VARCHAR(50), production_date DATE); INSERT INTO productions (id, garment, material, country, production_date) VALUES (1, 'T-Shirt', 'Organic Cotton', 'India', '2021-01-15'), (2, 'Hoodie', 'Bamboo Fabric', 'China', '2021-02-20'), (3, 'Jacket', 'Recycled Polyester', 'Bangladesh', '2021-03-10');", "sql": "SELECT m.country, EXTRACT(MONTH FROM production_date) as month, COUNT(*) as production_volume FROM productions p JOIN materials m ON p.country = m.country WHERE m.type IN ('Organic Cotton', 'Bamboo Fabric', 'Recycled Polyester') GROUP BY m.country, month ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the To par of Jack Nicklaus?", "schema": "CREATE TABLE table_name_71 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_71 WHERE player = 'jack nicklaus';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the average production volume of chemical substances in the Europe region for the year 2019?", "schema": "CREATE TABLE ChemicalSubstances (SubstanceID INT, SubstanceName VARCHAR(50), ProductionVolume INT, Region VARCHAR(50), ProductionDate DATE); INSERT INTO ChemicalSubstances (SubstanceID, SubstanceName, ProductionVolume, Region, ProductionDate) VALUES (1, 'Ethylene', 13000, 'Europe', '2019-01-01'), (2, 'Propylene', 9000, 'Europe', '2019-02-01'), (3, 'Benzenene', 14000, 'Europe', '2019-03-01');", "sql": "SELECT AVG(ProductionVolume) FROM ChemicalSubstances WHERE Region = 'Europe' AND YEAR(ProductionDate) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 106).", "schema": null, "sql": "SELECT '1'::money / 0::int2;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '1'::money / 0::int2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "What is the total cost of all road construction projects in Florida?", "schema": "CREATE TABLE road_construction (project_name TEXT, project_cost INT, project_state TEXT); INSERT INTO road_construction (project_name, project_cost, project_state) VALUES ('RPC1', 5000, 'Florida'), ('RPC2', 7000, 'Florida'), ('RPC3', 8000, 'Florida'), ('RPC4', 6000, 'Florida');", "sql": "SELECT SUM(project_cost) FROM road_construction WHERE project_state = 'Florida';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "List the case numbers, client addresses, and total billing amount for cases with the word 'divorce' in the case name, in the state of New York, ordered by the total billing amount in descending order.", "schema": "CREATE TABLE Cases (CaseID INT, CaseName VARCHAR(255), ClientAddress VARCHAR(255), AttorneyID INT); INSERT INTO Cases (CaseID, CaseName, ClientAddress, AttorneyID) VALUES (1, 'Smith v. Johnson - Divorce', '123 Main St, New York, NY', 1); CREATE TABLE Billing (BillingID INT, CaseID INT, Amount DECIMAL(10, 2));", "sql": "SELECT Cases.CaseID, Cases.ClientAddress, SUM(Billing.Amount) FROM Cases INNER JOIN Billing ON Cases.CaseID = Billing.CaseID WHERE Cases.CaseName LIKE '%divorce%' AND Cases.AttorneyID IN (SELECT AttorneyID FROM Attorneys WHERE State = 'New York') GROUP BY Cases.CaseID, Cases.ClientAddress ORDER BY SUM(Billing.Amount) DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 324, "num_statements": 1} {"question": "Which countries have participated in the community_education program, and what was the total expenditure for each country?", "schema": "CREATE TABLE community_education (id INT, country VARCHAR(50), participation BOOLEAN, expenditure INT);", "sql": "SELECT country, SUM(expenditure) FROM community_education WHERE participation = TRUE GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "List eSports teams with the lowest number of players", "schema": "CREATE TABLE teams (id INT, region VARCHAR(10), players INT); INSERT INTO teams (id, region, players) VALUES (1, 'Europe', 50); INSERT INTO teams (id, region, players) VALUES (2, 'Asia', 75); INSERT INTO teams (id, region, players) VALUES (3, 'America', 100); INSERT INTO teams (id, region, players) VALUES (4, 'Africa', 25);", "sql": "SELECT id, region, players FROM teams ORDER BY players ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 299).", "schema": null, "sql": "select c2/2, sum(c2) * (c2/2) from ft1 group by c2/2 order by c2/2;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total installed capacity of solar plants in the 'solar_plants' table, and what is the average installed capacity of these solar plants, grouped by state?", "schema": "CREATE TABLE solar_plants (id INT, state VARCHAR(255), name VARCHAR(255), capacity FLOAT, start_date DATE, end_date DATE); INSERT INTO solar_plants (id, state, name, capacity, start_date, end_date) VALUES (6, 'California', 'Solar Plant D', 40.0, '2021-01-01', '2026-12-31'), (7, 'Nevada', 'Solar Plant E', 50.0, '2022-01-01', '2027-12-31');", "sql": "SELECT state, SUM(capacity) as total_capacity, AVG(capacity) as avg_capacity FROM solar_plants GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the medal named fatuma roba category:articles with hcards for?", "schema": "CREATE TABLE table_name_73 (medal VARCHAR, name VARCHAR)", "sql": "SELECT medal FROM table_name_73 WHERE name = 'fatuma roba category:articles with hcards';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 586).", "schema": null, "sql": "UPDATE temporal_partitioned_fk_rng2rng SET id = '[4,5)' WHERE id = '[1,2)';", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the series number of the episode with production code 329?", "schema": "CREATE TABLE table_18055005_1 (no_in_series VARCHAR, prod_code VARCHAR)", "sql": "SELECT no_in_series FROM table_18055005_1 WHERE prod_code = '329';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the address of the restaurant Subway?", "schema": "CREATE TABLE Restaurant (Address VARCHAR, ResName VARCHAR)", "sql": "SELECT Address FROM Restaurant WHERE ResName = 'Subway';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many laps did Ricardo Zonta have?", "schema": "CREATE TABLE table_name_69 (laps INTEGER, driver VARCHAR)", "sql": "SELECT SUM(laps) FROM table_name_69 WHERE driver = 'ricardo zonta';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of goals scored of the club with more than 3 draws, more than 1 loses, and less than 26 points?", "schema": "CREATE TABLE table_name_15 (goals_scored VARCHAR, points VARCHAR, draws VARCHAR, loses VARCHAR)", "sql": "SELECT COUNT(goals_scored) FROM table_name_15 WHERE draws > 3 AND loses > 1 AND points < 26;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "How many community policing events were held in the last month in each district?", "schema": "CREATE TABLE community_policing (id INT, date DATE, district VARCHAR(20)); INSERT INTO community_policing (id, date, district) VALUES (1, '2022-01-01', 'district1'), (2, '2022-01-02', 'district2');", "sql": "SELECT district, COUNT(*) FROM community_policing WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY district;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "How many songs were released per month in 2020?", "schema": "CREATE TABLE song_release (id INT, title TEXT, release_month INT, release_year INT, genre TEXT); INSERT INTO song_release (id, title, release_month, release_year, genre) VALUES (1, 'Song4', 1, 2020, 'Pop'); INSERT INTO song_release (id, title, release_month, release_year, genre) VALUES (2, 'Song5', 3, 2020, 'Rock'); INSERT INTO song_release (id, title, release_month, release_year, genre) VALUES (3, 'Song6', 12, 2020, 'Jazz');", "sql": "SELECT release_month, COUNT(*) as songs_released FROM song_release WHERE release_year = 2020 GROUP BY release_month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What did vijay singh score with a +5 to par?", "schema": "CREATE TABLE table_name_92 (score VARCHAR, to_par VARCHAR, player VARCHAR)", "sql": "SELECT score FROM table_name_92 WHERE to_par = '+5' AND player = 'vijay singh';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score has detroit as the home, and December 9 as the date?", "schema": "CREATE TABLE table_name_78 (score VARCHAR, home VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_78 WHERE home = 'detroit' AND date = 'december 9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is average December when game is 30?", "schema": "CREATE TABLE table_name_79 (december INTEGER, game VARCHAR)", "sql": "SELECT AVG(december) FROM table_name_79 WHERE game = 30;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the clas AAAA of school year 2000-01", "schema": "CREATE TABLE table_name_76 (class_aAAA VARCHAR, school_year VARCHAR)", "sql": "SELECT class_aAAA FROM table_name_76 WHERE school_year = '2000-01';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What college does Dennis Scott attend?", "schema": "CREATE TABLE table_name_26 (college VARCHAR, player VARCHAR)", "sql": "SELECT college FROM table_name_26 WHERE player = 'dennis scott';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 924).", "schema": null, "sql": "CREATE TABLE pk32 (b int, a int NOT NULL);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score on February 23, 2005?", "schema": "CREATE TABLE table_name_81 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_81 WHERE date = 'february 23, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the highest salary for employees in the finance department?", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), department VARCHAR(50), salary FLOAT); INSERT INTO employees (id, name, department, salary) VALUES (1, 'John Doe', 'Marketing', 75000.00), (2, 'Jane Smith', 'Marketing', 80000.00), (3, 'Richard Roe', 'Finance', 90000.00), (4, 'Judy Johnson', 'Finance', 95000.00);", "sql": "SELECT MAX(salary) FROM employees WHERE department = 'Finance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average pots of Alfa Romeo v12 after 1983?", "schema": "CREATE TABLE table_name_44 (pts INTEGER, engine VARCHAR, year VARCHAR)", "sql": "SELECT AVG(pts) FROM table_name_44 WHERE engine = 'alfa romeo v12' AND year > 1983;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the number of military diplomacy events held by Russia with African countries in 2019?", "schema": "CREATE TABLE military_diplomacy (id INT, country VARCHAR(255), year INT, event_name VARCHAR(255)); INSERT INTO military_diplomacy (id, country, year, event_name) VALUES (1, 'Egypt', 2019, 'Russian-Egyptian Military Cooperation Commission');", "sql": "SELECT COUNT(*) FROM military_diplomacy WHERE country LIKE 'Africa%' AND year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Update the treatment type for patient 001 in the treatments table to \"Group Therapy\".", "schema": "CREATE TABLE patients (id TEXT PRIMARY KEY, name TEXT, age INT, gender TEXT); CREATE TABLE treatments (patient_id TEXT, treatment_type TEXT, therapist_id TEXT, start_date DATE, end_date DATE, FOREIGN KEY (patient_id) REFERENCES patients(id));", "sql": "WITH updated_treatment AS (UPDATE treatments SET treatment_type = 'Group Therapy' WHERE patient_id = '001') SELECT * FROM updated_treatment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 64).", "schema": null, "sql": "SELECT '1&(2&(4&(5&6)))'::query_int;", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the total revenue for the 'Blue' line since its inception?", "schema": "CREATE TABLE routes (line VARCHAR(10), start_date DATE); INSERT INTO routes (line, start_date) VALUES ('Blue', '2010-01-01'); CREATE TABLE fares (route VARCHAR(10), revenue DECIMAL(10, 2)); INSERT INTO fares (route, revenue) VALUES ('Blue', 5000), ('Blue', 6000), ('Blue', 7000);", "sql": "SELECT SUM(revenue) FROM fares WHERE route = (SELECT line FROM routes WHERE start_date <= '2010-01-01' AND line = 'Blue' LIMIT 1);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the name and type of all satellites launched by spacecraft from Russia?", "schema": "CREATE TABLE Spacecraft (id INT, name VARCHAR(50), country VARCHAR(50), launch_date DATE); INSERT INTO Spacecraft (id, name, country, launch_date) VALUES (1, 'Falcon 9', 'USA', '2010-06-04'); INSERT INTO Spacecraft (id, name, country, launch_date) VALUES (2, 'Soyuz-FG', 'Russia', '2001-11-02'); CREATE TABLE Satellites (id INT, name VARCHAR(50), type VARCHAR(50), spacecraft_id INT); INSERT INTO Satellites (id, name, type, spacecraft_id) VALUES (1, 'TESS', 'Observation', 1); INSERT INTO Satellites (id, name, type, spacecraft_id) VALUES (2, 'MetOp-C', 'Weather', 2);", "sql": "SELECT s.name, s.type FROM Satellites s JOIN Spacecraft sp ON s.spacecraft_id = sp.id WHERE sp.country = 'Russia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the breakdown of the budget allocated for accessibility improvements in each school?", "schema": "CREATE TABLE AccessibilityImprovements (SchoolName VARCHAR(255), Year INT, Budget DECIMAL(10,2)); INSERT INTO AccessibilityImprovements (SchoolName, Year, Budget) VALUES ('SchoolA', 2020, 50000.00), ('SchoolB', 2020, 75000.00), ('SchoolC', 2019, 60000.00);", "sql": "SELECT SchoolName, SUM(Budget) as TotalBudget FROM AccessibilityImprovements WHERE SchoolName IN (SELECT SchoolName FROM Schools WHERE Type = 'School') GROUP BY SchoolName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Insert a new garment with id 3, name 'Jacket', category 'Men', rating 5 and quantity 25 into the database.", "schema": "CREATE TABLE garments (id INT PRIMARY KEY, name VARCHAR(255), category VARCHAR(255), rating INT); CREATE TABLE inventories (id INT PRIMARY KEY, garment_id INT, quantity INT);", "sql": "INSERT INTO garments (id, name, category, rating) VALUES (3, 'Jacket', 'Men', 5); INSERT INTO inventories (id, garment_id, quantity) VALUES (3, 3, 25);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 2} {"question": "What is the total number of accessible technology conferences in South America?", "schema": "CREATE TABLE tech_conferences (conference_location VARCHAR(255), is_accessible BOOLEAN); INSERT INTO tech_conferences (conference_location, is_accessible) VALUES ('Brazil', true), ('Colombia', false), ('Argentina', true);", "sql": "SELECT COUNT(*) FROM tech_conferences WHERE conference_location LIKE 'South%' AND is_accessible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the High points with a Game that is 56?", "schema": "CREATE TABLE table_name_31 (high_points VARCHAR, game VARCHAR)", "sql": "SELECT high_points FROM table_name_31 WHERE game = 56;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Which public transportation systems were added in Q3 2021?", "schema": "CREATE TABLE schema.public_transportation (system_id INT, system_name VARCHAR(50), system_type VARCHAR(50), implementation_date DATE); INSERT INTO schema.public_transportation (system_id, system_name, system_type, implementation_date) VALUES (1, 'Subway', 'Rail', '2021-04-01'), (2, 'Light Rail', 'Rail', '2021-07-01'), (3, 'Bus Rapid Transit', 'Bus', '2021-10-01');", "sql": "SELECT system_name FROM schema.public_transportation WHERE implementation_date BETWEEN '2021-07-01' AND '2021-09-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "What is the average age of visitors who attended the \"African Art\" exhibition?", "schema": "CREATE TABLE Exhibitions (ExhibitionID INT, ExhibitionName VARCHAR(255), Age INT); INSERT INTO Exhibitions (ExhibitionID, ExhibitionName, Age) VALUES (1, 'African Art Exhibition', 32), (2, 'Contemporary Art Exhibition', 28);", "sql": "SELECT AVG(Age) FROM Exhibitions WHERE ExhibitionName = 'African Art Exhibition';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the wkts for econ of 4.23", "schema": "CREATE TABLE table_name_13 (wkts VARCHAR, econ VARCHAR)", "sql": "SELECT wkts FROM table_name_13 WHERE econ = '4.23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Delete fish farming records with no species information?", "schema": "CREATE TABLE FishFarming (FarmID INT, Location VARCHAR(50), Date DATE, Species VARCHAR(50));", "sql": "DELETE FROM FishFarming WHERE Species IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the USN 2013 ranking with a BW 2013 ranking less than 1000, a Forbes 2011 ranking larger than 17, and a CNN 2011 ranking less than 13?", "schema": "CREATE TABLE table_name_63 (usn_2013 INTEGER, cnn_2011 VARCHAR, bw_2013 VARCHAR, forbes_2011 VARCHAR)", "sql": "SELECT SUM(usn_2013) FROM table_name_63 WHERE bw_2013 < 1000 AND forbes_2011 > 17 AND cnn_2011 < 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 5).", "schema": null, "sql": "SELECT * FROM test_macaddr8 WHERE i<'22:00:5c:08:55:08:01:02'::macaddr8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many graduate students are enrolled in the Mathematics or Physics departments?", "schema": "CREATE TABLE departments (id INT, name VARCHAR(50)); INSERT INTO departments (id, name) VALUES (1, 'Mathematics'); INSERT INTO departments (id, name) VALUES (2, 'Physics'); CREATE TABLE students (id INT, name VARCHAR(50), department_id INT, level VARCHAR(10)); INSERT INTO students (id, name, department_id, level) VALUES (1, 'John Doe', 1, 'Graduate'); INSERT INTO students (id, name, department_id, level) VALUES (2, 'Jane Smith', 2, 'Graduate');", "sql": "SELECT COUNT(*) FROM students WHERE department_id IN (SELECT id FROM departments WHERE name IN ('Mathematics', 'Physics')) AND level = 'Graduate';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "What is the most common type of crime committed in each borough, in the last month?", "schema": "CREATE TABLE crimes (id INT, date DATE, borough VARCHAR(50), type VARCHAR(50));", "sql": "SELECT borough, type, COUNT(*) as count FROM crimes WHERE date >= DATEADD(MONTH, -1, GETDATE()) GROUP BY borough, type ORDER BY borough, count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (size_utils, item 24).", "schema": null, "sql": "-- Get relation size of the chunks of an hypertable\n-- hypertable - hypertable to get size of\n--\n-- Returns:\n-- chunk_schema - schema name for chunk\n-- chunk_name - chunk table name\n-- table_bytes - Disk space used by chunk table\n-- index_bytes - Disk space used by indexes\n-- toast_bytes - Disk space of toast tables\n-- total_bytes - Disk space used in total\n-- node_name - node on which chunk lives if this is\n-- a distributed hypertable.\nCREATE OR REPLACE FUNCTION @extschema@.chunks_detailed_size(\n hypertable REGCLASS\n)\nRETURNS TABLE (\n chunk_schema NAME,\n chunk_name NAME,\n table_bytes BIGINT,\n index_bytes BIGINT,\n toast_bytes BIGINT,\n total_bytes BIGINT,\n node_name NAME)\nLANGUAGE PLPGSQL VOLATILE STRICT AS\n$BODY$\nDECLARE\n table_name NAME;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1034, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many against when losses are 11 and wins are fewer than 5?", "schema": "CREATE TABLE table_name_24 (against INTEGER, losses VARCHAR, wins VARCHAR)", "sql": "SELECT AVG(against) FROM table_name_24 WHERE losses = 11 AND wins < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the count of startups founded by Latinx individuals in the education sector?", "schema": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founder_race TEXT); INSERT INTO company (id, name, industry, founder_race) VALUES (1, 'EduBoost', 'Education', 'Latinx'); INSERT INTO company (id, name, industry, founder_race) VALUES (2, 'Shopify', 'E-commerce', 'Asian'); CREATE TABLE funding_round (company_id INT, round_size INT); INSERT INTO funding_round (company_id, round_size) VALUES (1, 5000000); INSERT INTO funding_round (company_id, round_size) VALUES (2, 7000000);", "sql": "SELECT COUNT(*) FROM company WHERE company.founder_race = 'Latinx' AND company.industry = 'Education';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Insert a new record into the 'calibration_data' table with 'algorithm' = 'Deep Learning', 'precision' = 0.8, 'recall' = 0.7", "schema": "CREATE TABLE calibration_data (id INT, algorithm VARCHAR(20), precision DECIMAL(3,2), recall DECIMAL(3,2)); INSERT INTO calibration_data (id, algorithm, precision, recall) VALUES (1, 'Deep Learning', 0.8, 0.7);", "sql": "INSERT INTO calibration_data (algorithm, precision, recall) VALUES ('Deep Learning', 0.8, 0.7);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total number of volunteers in organizations that have implemented healthcare projects in Asia since 2018?", "schema": "CREATE TABLE organizations (id INT, name TEXT, region TEXT); CREATE TABLE projects (id INT, organization_id INT, project_name TEXT, start_date DATE); INSERT INTO organizations (id, name, region) VALUES (1, 'Aid Africa', 'Africa'), (2, 'Asian Aid', 'Asia'), (3, 'Healthcare Hope', 'Asia'); INSERT INTO projects (id, organization_id, project_name, start_date) VALUES (1, 2, 'Healthcare Initiative', '2018-04-01'), (2, 2, 'Education Program', '2017-08-15'), (3, 3, 'Disaster Relief', '2019-12-25');", "sql": "SELECT COUNT(DISTINCT o.id) FROM organizations o INNER JOIN projects p ON o.id = p.organization_id WHERE o.region = 'Asia' AND p.project_name LIKE '%healthcare%' AND p.start_date >= '2018-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Original title has a Year of 1983, and a Country of italy?", "schema": "CREATE TABLE table_name_78 (original_title VARCHAR, year VARCHAR, country VARCHAR)", "sql": "SELECT original_title FROM table_name_78 WHERE year = 1983 AND country = 'italy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country has 50 supermarkets?", "schema": "CREATE TABLE table_name_19 (country VARCHAR, supermarkets VARCHAR)", "sql": "SELECT country FROM table_name_19 WHERE supermarkets = '50';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the name and phone for customers with a mailshot with outcome code 'No Response'.", "schema": "CREATE TABLE customers (customer_name VARCHAR, customer_phone VARCHAR, customer_id VARCHAR); CREATE TABLE mailshot_customers (customer_id VARCHAR, outcome_code VARCHAR)", "sql": "SELECT T1.customer_name, T1.customer_phone FROM customers AS T1 JOIN mailshot_customers AS T2 ON T1.customer_id = T2.customer_id WHERE T2.outcome_code = 'No Response';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "List all news stories from the 'news_stories' table that have a corresponding entry in the 'ethics_violations' table.", "schema": "CREATE TABLE news_stories (story_id INT, title VARCHAR(100), description TEXT, reporter_id INT, publish_date DATE); CREATE TABLE ethics_violations (violation_id INT, story_id INT, description TEXT, violation_date DATE);", "sql": "SELECT news_stories.title FROM news_stories INNER JOIN ethics_violations ON news_stories.story_id = ethics_violations.story_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "What is the average R&D cost for drugs approved after 2018?", "schema": "CREATE TABLE r_and_d_expenditures (drug_name VARCHAR(255), r_and_d_cost FLOAT, year INT); INSERT INTO r_and_d_expenditures (drug_name, r_and_d_cost, year) VALUES ('DrugD', 7000000.00, 2019);", "sql": "SELECT AVG(r_and_d_cost) as avg_r_and_d_cost FROM r_and_d_expenditures e JOIN drug_approvals a ON e.drug_name = a.drug_name WHERE a.approval_date > '2018-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "PostgreSQL Queries: show example 31.", "schema": null, "sql": "SELECT a + b AS sum, c FROM table1 ORDER BY sum + c; -- wrong;", "explanation": "Example from PostgreSQL documentation on Queries.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What was the score of united states player wally armstrong when he had a To par of +1", "schema": "CREATE TABLE table_name_8 (score VARCHAR, player VARCHAR, country VARCHAR, to_par VARCHAR)", "sql": "SELECT score FROM table_name_8 WHERE country = 'united states' AND to_par = '+1' AND player = 'wally armstrong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of Season #15?", "schema": "CREATE TABLE table_12564633_1 (title VARCHAR, season__number VARCHAR)", "sql": "SELECT title FROM table_12564633_1 WHERE season__number = 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What was the average energy efficiency rating of appliances sold in Indonesia and Malaysia in 2021?", "schema": "CREATE TABLE appliance_efficiency (id INT, country VARCHAR(255), year INT, efficiency_rating INT); INSERT INTO appliance_efficiency (id, country, year, efficiency_rating) VALUES (1, 'Indonesia', 2021, 4), (2, 'Malaysia', 2021, 5);", "sql": "SELECT AVG(efficiency_rating) FROM appliance_efficiency WHERE country IN ('Indonesia', 'Malaysia') AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the maximum number of likes on posts from users aged 18-24, in the 'North America' region, in the past month?", "schema": "CREATE TABLE users (id INT, age INT, country TEXT, posts TEXT);", "sql": "SELECT MAX(likes) FROM (SELECT content, MAX(likes) AS likes FROM posts JOIN users ON posts.id = users.id WHERE users.age BETWEEN 18 AND 24 AND users.country IN (SELECT country FROM countries WHERE region = 'North America') AND posts.timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 1 MONTH) AND NOW() GROUP BY content) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 325, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the location for chattahoochee technical college", "schema": "CREATE TABLE table_16734640_1 (location VARCHAR, institution VARCHAR)", "sql": "SELECT location FROM table_16734640_1 WHERE institution = 'Chattahoochee Technical College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the type of vehicle driven by mark higgins", "schema": "CREATE TABLE table_28046929_2 (car VARCHAR, driver VARCHAR)", "sql": "SELECT car FROM table_28046929_2 WHERE driver = 'Mark Higgins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Tournament has a Partner of tom gorman?", "schema": "CREATE TABLE table_name_65 (tournament VARCHAR, partner VARCHAR)", "sql": "SELECT tournament FROM table_name_65 WHERE partner = 'tom gorman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the minimum donation amount in the 'donors' table?", "schema": "CREATE TABLE donors (id INT, name TEXT, age INT, donation FLOAT); INSERT INTO donors (id, name, age, donation) VALUES (1, 'John Doe', 35, 500.00); INSERT INTO donors (id, name, age, donation) VALUES (2, 'Jane Smith', 45, 750.00); INSERT INTO donors (id, name, age, donation) VALUES (3, 'Bob Johnson', 25, 600.00);", "sql": "SELECT MIN(donation) FROM donors;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the percentage change in sales of each product category in the Asia-Pacific region from 2021 to 2022?", "schema": "CREATE TABLE sales_data_5 (sale_id INT, product_category VARCHAR(255), region VARCHAR(255), sale_quantity INT, sale_year INT);", "sql": "SELECT a.product_category, ((a.sale_quantity - b.sale_quantity) * 100.0 / b.sale_quantity) AS sales_percentage_change FROM sales_data_5 a JOIN sales_data_5 b ON a.product_category = b.product_category AND a.region = b.region AND a.sale_year = b.sale_year + 1 WHERE a.region LIKE 'Asia-Pacific%' AND b.sale_year = 2021 GROUP BY a.product_category, a.sale_quantity, b.sale_quantity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 380, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 256).", "schema": null, "sql": "CREATE OPERATOR < (\n\tPROCEDURE = isnlt,\n\tLEFTARG = ean13,\n\tRIGHTARG = upc,\n\tCOMMUTATOR = >,\n\tNEGATOR = >=,\n\tRESTRICT = scalarltsel,\n\tJOIN = scalarltjoinsel);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of gold when the silver is 1, bronze is 1, and the nation is Austria?", "schema": "CREATE TABLE table_name_4 (gold VARCHAR, nation VARCHAR, silver VARCHAR, bronze VARCHAR)", "sql": "SELECT COUNT(gold) FROM table_name_4 WHERE silver = 1 AND bronze = 1 AND nation = 'austria';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 66).", "schema": null, "sql": "SELECT * FROM foo WHERE id IN\n (SELECT id2 FROM (SELECT id1, id2 FROM bar UNION\n SELECT id1, id2 FROM bar) AS s);", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM foo WHERE id IN\n (SELECT id2 FROM (SELECT id1, id2 FROM bar UNION\n SELECT id1, id2 FROM bar) AS s)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the upstream for the 100 mbps downstream?", "schema": "CREATE TABLE table_name_82 (upstream VARCHAR, downstream VARCHAR)", "sql": "SELECT upstream FROM table_name_82 WHERE downstream = '100 mbps';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the most recent mission for each astronaut, partitioned by gender?", "schema": "CREATE TABLE Astronauts (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), mission_id INT); CREATE TABLE Missions (id INT, name VARCHAR(50), launch_site VARCHAR(50), launch_date DATE);", "sql": "SELECT a.name, m.name as mission_name, m.launch_date FROM Astronauts a JOIN Missions m ON a.mission_id = m.id JOIN (SELECT mission_id, gender, MAX(launch_date) AS MaxDate FROM Astronauts GROUP BY mission_id, gender) AS MaxDates ON a.mission_id = MaxDates.mission_id AND a.gender = MaxDates.gender AND m.launch_date = MaxDates.MaxDate ORDER BY a.gender, m.launch_date DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 372, "num_statements": 1} {"question": "How many users have achieved their daily step goal for the past week, and what is the average age of these users?", "schema": "CREATE TABLE Users (ID INT PRIMARY KEY, Age INT, DailySteps INT, Date DATE);", "sql": "SELECT AVG(Age), COUNT(*) FROM Users WHERE DailySteps >= (SELECT AVG(DailySteps) FROM Users WHERE Date = (SELECT MAX(Date) FROM Users)) AND Date >= DATEADD(week, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 482).", "schema": null, "sql": "CREATE PUBLICATION testpub_forunloggedtbl FOR TABLE testpub_unloggedtbl;", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the maximum number of community engagement events held in a week?", "schema": "CREATE TABLE Weekly_Community_Engagement (id INT, week_start_date DATE, num_events INT); INSERT INTO Weekly_Community_Engagement (id, week_start_date, num_events) VALUES (1, '2022-02-01', 10), (2, '2022-02-08', 12), (3, '2022-02-15', 15), (4, '2022-02-22', 18), (5, '2022-03-01', 20);", "sql": "SELECT MAX(num_events) FROM Weekly_Community_Engagement;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Get the names of all players who have played a game designed by a designer from 'Europe'", "schema": "CREATE TABLE Players_Games (player_id INT, game_id INT); CREATE TABLE Games (id INT, name VARCHAR(50), designer VARCHAR(50), design_region VARCHAR(20)); INSERT INTO Players_Games (player_id, game_id) VALUES (1, 1), (2, 2), (3, 3); INSERT INTO Games (id, name, designer, design_region) VALUES (1, 'Beat Saber', 'Jan Kozlovsky', 'Europe'), (2, 'Job Simulator', 'Alex Schwartz', 'USA'), (3, 'Superhot VR', 'Tomasz Kaczmarczyk', 'Europe');", "sql": "SELECT Players.name FROM Players_Games JOIN Players ON Players_Games.player_id = Players.id JOIN Games ON Players_Games.game_id = Games.id WHERE Games.design_region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the scored figure when the result is 40-22?", "schema": "CREATE TABLE table_name_94 (scored INTEGER, result VARCHAR)", "sql": "SELECT SUM(scored) FROM table_name_94 WHERE result = '40-22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total revenue for all jazz albums sold on the 'streaming' platform?", "schema": "CREATE TABLE artists (id INT, name TEXT, genre TEXT); CREATE TABLE albums (id INT, title TEXT, artist_id INT, platform TEXT); CREATE TABLE sales (id INT, album_id INT, quantity INT, revenue DECIMAL); CREATE VIEW jazz_albums AS SELECT a.id, a.title, a.artist_id, a.platform FROM albums a JOIN artists ar ON a.artist_id = ar.id WHERE ar.genre = 'jazz'; CREATE VIEW jazz_sales AS SELECT s.id, sa.album_id, s.quantity, s.revenue FROM sales s JOIN jazz_albums ja ON s.album_id = ja.id;", "sql": "SELECT SUM(revenue) FROM jazz_sales WHERE platform = 'streaming';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Social Democratic has a Democratic and Social Centre of 10.0% 22 seats?", "schema": "CREATE TABLE table_name_33 (social_democratic VARCHAR, democratic_and_social_centre VARCHAR)", "sql": "SELECT social_democratic FROM table_name_33 WHERE democratic_and_social_centre = '10.0% 22 seats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was Craig Stadler's lowest score for United states?", "schema": "CREATE TABLE table_name_35 (score INTEGER, country VARCHAR, player VARCHAR)", "sql": "SELECT MIN(score) FROM table_name_35 WHERE country = 'united states' AND player = 'craig stadler';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games did they play on january 7?", "schema": "CREATE TABLE table_27734286_7 (high_points VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(high_points) FROM table_27734286_7 WHERE date = 'January 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When Team 1 is Aalborg BK, what is the 1st Leg?", "schema": "CREATE TABLE table_name_53 (team_1 VARCHAR)", "sql": "SELECT 1 AS st_leg FROM table_name_53 WHERE team_1 = 'aalborg bk';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the male Bounder books?", "schema": "CREATE TABLE table_name_23 (books VARCHAR, gender VARCHAR, animal_name VARCHAR)", "sql": "SELECT books FROM table_name_23 WHERE gender = 'male' AND animal_name = 'bounder';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 6 is the rank what is the time for Tuesday August 25th?", "schema": "CREATE TABLE table_23465864_3 (tues_25_aug VARCHAR, rank VARCHAR)", "sql": "SELECT tues_25_aug FROM table_23465864_3 WHERE rank = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total number of marine species researched and their respective researchers?", "schema": "CREATE TABLE MarineSpecies (id INT PRIMARY KEY, species VARCHAR(255), researcher VARCHAR(255));", "sql": "SELECT species, researcher, COUNT(*) as total FROM MarineSpecies GROUP BY species, researcher;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Show biosensor technology development data for optical sensors from the biosensor_development table", "schema": "CREATE TABLE biosensor_development (id INT, sensor_type VARCHAR(50), data TEXT, date DATE); INSERT INTO biosensor_development (id, sensor_type, data, date) VALUES (1, 'optical', 'Sensor data 1', '2022-01-01'); INSERT INTO biosensor_development (id, sensor_type, data, date) VALUES (2, 'electrochemical', 'Sensor data 2', '2022-02-01');", "sql": "SELECT * FROM biosensor_development WHERE sensor_type = 'optical';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Compare labor productivity and accident rates in Mexican mining operations between 2017 and 2019.", "schema": "CREATE TABLE mx_mine_productivity (year INT, productivity FLOAT); INSERT INTO mx_mine_productivity (year, productivity) VALUES (2017, 1.7), (2018, 1.9), (2019, 2.1); CREATE TABLE mx_mine_safety (year INT, accident_rate FLOAT); INSERT INTO mx_mine_safety (year, accident_rate) VALUES (2017, 0.02), (2018, 0.018), (2019, 0.016);", "sql": "SELECT mx_mine_productivity.year, mx_mine_productivity.productivity, mx_mine_safety.accident_rate FROM mx_mine_productivity INNER JOIN mx_mine_safety ON mx_mine_productivity.year = mx_mine_safety.year WHERE mx_mine_productivity.year BETWEEN 2017 AND 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 255, "num_statements": 1} {"question": "What is the total number of properties in sustainable urban areas?", "schema": "CREATE TABLE sustainable_urban (area_id INT, property_id INT); INSERT INTO sustainable_urban (area_id, property_id) VALUES (1, 101), (1, 102), (1, 103), (2, 201), (2, 202), (3, 301);", "sql": "SELECT COUNT(DISTINCT property_id) FROM sustainable_urban;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Which community health workers have the most experience?", "schema": "CREATE TABLE community_health_workers (id INT, name VARCHAR(50), race VARCHAR(50), ethnicity VARCHAR(50), years_of_experience INT); INSERT INTO community_health_workers (id, name, race, ethnicity, years_of_experience) VALUES (1, 'John Doe', 'White', 'Not Hispanic or Latino', 5), (2, 'Jane Smith', 'Black or African American', 'Not Hispanic or Latino', 10);", "sql": "SELECT name, years_of_experience, RANK() OVER (ORDER BY years_of_experience DESC) as rank FROM community_health_workers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 120, "num_statements": 1} {"question": "What is the policy type and effective date of policies with a risk score less than 700 and that have a claim in 2021?", "schema": "CREATE TABLE Policy (PolicyID int, PolicyType varchar(50), EffectiveDate date, RiskScore int); CREATE TABLE Claim (ClaimID int, PolicyID int, ClaimDate date, ClaimAmount int, State varchar(50)); INSERT INTO Policy (PolicyID, PolicyType, EffectiveDate, RiskScore) VALUES (1, 'Auto', '2020-01-01', 700), (2, 'Home', '2019-05-05', 900), (3, 'Life', '2021-08-01', 850); INSERT INTO Claim (ClaimID, PolicyID, ClaimDate, ClaimAmount, State) VALUES (1, 1, '2020-03-15', 2000, 'Texas'), (2, 2, '2019-12-27', 3000, 'California'), (3, 3, '2021-01-05', 1500, 'Texas');", "sql": "SELECT Policy.PolicyType, Policy.EffectiveDate FROM Policy INNER JOIN Claim ON Policy.PolicyID = Claim.PolicyID WHERE Policy.RiskScore < 700 AND Claim.ClaimDate BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the last round the team drafted?", "schema": "CREATE TABLE table_14999879_2 (round INTEGER)", "sql": "SELECT MAX(round) FROM table_14999879_2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'multirangetypes' (example 387).", "schema": null, "sql": "insert into test_multirange_gist select int4multirange(int4range(g, g+10),int4range(g+20, g+30),int4range(g+40, g+50)) from generate_series(1,2000) g;", "explanation": "DML from PostgreSQL core regression test for Multirangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which claimant's rank is 200?", "schema": "CREATE TABLE table_name_7 (claimant VARCHAR, rank VARCHAR)", "sql": "SELECT claimant FROM table_name_7 WHERE rank = 200;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the average obesity rate in Sydney, Australia in 2020?", "schema": "CREATE TABLE ObesityRates (ID INT, Rate FLOAT, City VARCHAR(50), Year INT); INSERT INTO ObesityRates (ID, Rate, City, Year) VALUES (1, 25.3, 'Sydney', 2020); INSERT INTO ObesityRates (ID, Rate, City, Year) VALUES (2, 27.1, 'Sydney', 2020);", "sql": "SELECT AVG(Rate) FROM ObesityRates WHERE City = 'Sydney' AND Year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of public transportation users in Sydney?", "schema": "CREATE TABLE public_transportation (id INT, users INT, city VARCHAR(20)); INSERT INTO public_transportation (id, users, city) VALUES (1, 200000, 'Sydney');", "sql": "SELECT SUM(users) FROM public_transportation WHERE city = 'Sydney';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 34).", "schema": null, "sql": "SELECT * FROM test_float4 WHERE i<='NaN'::float8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total landfill capacity for regions with a population density above 5000 people per square kilometer?", "schema": "CREATE TABLE region (name TEXT, population INT, area FLOAT, landfill_capacity FLOAT); INSERT INTO region (name, population, area, landfill_capacity) VALUES ('Region A', 500000, 100, 600), ('Region B', 600000, 120, 800), ('Region C', 400000, 80, 500);", "sql": "SELECT SUM(landfill_capacity) FROM region WHERE population/area > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the subject who ran in the general election for Queen Anne's County State's Attorney?", "schema": "CREATE TABLE table_name_63 (subject VARCHAR, election VARCHAR, office VARCHAR)", "sql": "SELECT subject FROM table_name_63 WHERE election = 'general' AND office = 'queen anne's county state's attorney';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 57).", "schema": null, "sql": "SELECT pg_replication_origin_create('regress_test_decoding: regression_slot_no_lsn');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "How many space missions were led by astronauts from the UK?", "schema": "CREATE TABLE SpaceMissionRecords (mission_name VARCHAR(30), astronaut_name VARCHAR(30), country VARCHAR(20), mission_duration INT); INSERT INTO SpaceMissionRecords (mission_name, astronaut_name, country, mission_duration) VALUES ('Moon Landing', 'James Smith', 'UK', 150), ('Mars Exploration', 'Oliver Lee', 'UK', 250);", "sql": "SELECT COUNT(*) FROM SpaceMissionRecords WHERE country = 'UK';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'stats' (example 25).", "schema": null, "sql": "UPDATE trunc_stats_test1 SET id = id + 100;", "explanation": "DML from PostgreSQL core regression test for Stats.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the result for New York 7 district", "schema": "CREATE TABLE table_1341395_33 (result VARCHAR, district VARCHAR)", "sql": "SELECT result FROM table_1341395_33 WHERE district = 'New York 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Find the number of chemical products produced by each manufacturer and the total safety protocol number for each manufacturer.", "schema": "CREATE TABLE manufacturer_products (manufacturer INT, product INT, safety_protocol INT); INSERT INTO manufacturer_products (manufacturer, product, safety_protocol) VALUES (1001, 1, 650), (1002, 2, 300), (1003, 3, 700), (1001, 4, 800), (1002, 5, 900);", "sql": "SELECT manufacturer, COUNT(DISTINCT product) as num_products, SUM(safety_protocol) as total_safety_protocol FROM manufacturer_products GROUP BY manufacturer;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Delete records from the 'oil_platforms' table where the platform_name = 'Bering Sea Rig 1'", "schema": "CREATE TABLE oil_platforms (platform_id INT PRIMARY KEY, platform_name VARCHAR(255), water_depth_ft INT, operational_status VARCHAR(50));", "sql": "DELETE FROM oil_platforms WHERE platform_name = 'Bering Sea Rig 1';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Display the AI safety incidents in Asia with costs greater than $6000.", "schema": "CREATE TABLE ai_safety_incidents (incident_id INTEGER, incident_cost FLOAT, region TEXT); INSERT INTO ai_safety_incidents (incident_id, incident_cost, region) VALUES (11, 6500, 'Asia'), (12, 5000, 'Asia'), (13, 7000, 'Europe');", "sql": "SELECT incident_id, incident_cost FROM ai_safety_incidents WHERE region = 'Asia' AND incident_cost > 6000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Canada singles in 1979 with a US A.C. of 24?", "schema": "CREATE TABLE table_name_75 (canada_singles VARCHAR, year VARCHAR, us_ac VARCHAR)", "sql": "SELECT canada_singles FROM table_name_75 WHERE year = 1979 AND us_ac = '24';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent in week 3?", "schema": "CREATE TABLE table_name_34 (opponent VARCHAR, week VARCHAR)", "sql": "SELECT opponent FROM table_name_34 WHERE week = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the total number of visitors by age group for all art exhibitions?", "schema": "CREATE TABLE art_exhibitions (exhibition_id INT, exhibition_name VARCHAR(50), visitor_count INT, age_group VARCHAR(20));", "sql": "SELECT age_group, SUM(visitor_count) as total_visitors FROM art_exhibitions GROUP BY age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total quantity of gold extracted by each company?", "schema": "CREATE TABLE company (id INT, name VARCHAR(50));CREATE TABLE extraction (company_id INT, mineral VARCHAR(10), quantity INT); INSERT INTO company (id, name) VALUES (1, 'ABC Mining'), (2, 'XYZ Corp'); INSERT INTO extraction (company_id, mineral, quantity) VALUES (1, 'gold', 500), (1, 'silver', 300), (2, 'gold', 800), (2, 'copper', 700);", "sql": "SELECT e.company_id, c.name, SUM(e.quantity) AS total_gold_quantity FROM extraction e JOIN company c ON e.company_id = c.id WHERE e.mineral = 'gold' GROUP BY e.company_id, c.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "What is the total number of heritage sites in the Oceania region?", "schema": "CREATE TABLE HeritageSites (SiteID INT, SiteName VARCHAR(100), Location VARCHAR(100), Visits INT); INSERT INTO HeritageSites (SiteID, SiteName, Location, Visits) VALUES (1, 'Sydney Opera House', 'Australia', 8000000);", "sql": "SELECT COUNT(*) FROM HeritageSites WHERE Location LIKE 'Oceania%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 335).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (8,0,'NaN');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total waste quantity generated and the total number of circular economy initiatives, for each location and material, for the year 2023?", "schema": "CREATE TABLE WasteGeneration (Date date, Location text, Material text, Quantity integer);CREATE TABLE CircularEconomyInitiatives (Location text, Initiative text, StartDate date);", "sql": "SELECT wg.Location, wg.Material, SUM(wg.Quantity) as TotalWasteQuantity, COUNT(DISTINCT cei.Initiative) as NumberOfInitiatives FROM WasteGeneration wg LEFT JOIN CircularEconomyInitiatives cei ON wg.Location = cei.Location WHERE wg.Date >= '2023-01-01' AND wg.Date < '2024-01-01' GROUP BY wg.Location, wg.Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 313, "num_statements": 1} {"question": "Show the number of drought impacts for each severity", "schema": "CREATE TABLE drought_impact (location VARCHAR(255), year INT, severity VARCHAR(255));", "sql": "SELECT severity, COUNT(*) as num_impacts FROM drought_impact GROUP BY severity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many art pieces were created by artists from Asia in the 'painting' medium?", "schema": "CREATE TABLE art_pieces (id INT, title TEXT, artist_name TEXT, medium TEXT, region TEXT); INSERT INTO art_pieces (id, title, artist_name, medium, region) VALUES (1, 'Mount Fuji', 'Hokusai', 'painting', 'Asia');", "sql": "SELECT COUNT(*) FROM art_pieces WHERE medium = 'painting' AND region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all financial institutions offering Shariah-compliant loans in Asia within the last 2 years.", "schema": "CREATE TABLE financial_institutions (id INT, name TEXT, location TEXT, last_shariah_activity DATE);", "sql": "SELECT name FROM financial_institutions WHERE location LIKE 'Asia%' AND last_shariah_activity BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR) AND CURRENT_DATE AND offering_shariah_loans = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player has a To par of +1 and a Score of 73-71-71=217?", "schema": "CREATE TABLE table_name_24 (player VARCHAR, to_par VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_24 WHERE to_par = '+1' AND score = 73 - 71 - 71 = 217;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Show the number of unique vehicle makes and models in the 'vehicle_info' table, grouped by 'vehicle_type'.", "schema": "CREATE TABLE vehicle_info (id INT, vehicle_make VARCHAR, vehicle_model VARCHAR, vehicle_type VARCHAR);", "sql": "SELECT vehicle_type, COUNT(DISTINCT CONCAT(vehicle_make, '_', vehicle_model)) AS unique_vehicles FROM vehicle_info GROUP BY vehicle_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What is the total revenue of factories in the renewable energy sector?", "schema": "CREATE TABLE renewable_energy_factories (id INT, factory_name VARCHAR(100), location VARCHAR(50), revenue DECIMAL(10, 2)); INSERT INTO renewable_energy_factories (id, factory_name, location, revenue) VALUES (1, 'EcoPower', 'USA', 10000000); INSERT INTO renewable_energy_factories (id, factory_name, location, revenue) VALUES (2, 'GreenTech', 'Canada', 15000000);", "sql": "SELECT SUM(ref.revenue) as total_revenue FROM renewable_energy_factories ref;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Count the number of marine mammals in 'ocean_mammals' table.", "schema": "CREATE TABLE ocean_mammals (id INT, species VARCHAR(255), population INT);", "sql": "SELECT COUNT(*) FROM ocean_mammals WHERE species LIKE '%mammal%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 174).", "schema": null, "sql": "SELECT cube_enlarge('(2),(-2)'::cube, 0, 4);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "What is the total revenue from Spotify streams for Taylor Swift in 2020?", "schema": "CREATE TABLE SpotifyStreams (artist VARCHAR(255), year INT, revenue FLOAT);", "sql": "SELECT SUM(revenue) FROM SpotifyStreams WHERE artist = 'Taylor Swift' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 347).", "schema": null, "sql": "INSERT INTO temporal_mltrng3 (id, valid_at) VALUES ('[1,2)', datemultirange(daterange('2010-01-01', '2020-01-01'))) ON CONFLICT (id, valid_at) DO NOTHING;", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the former school of the player from Detroit, MI?", "schema": "CREATE TABLE table_29418619_1 (former_school VARCHAR, hometown VARCHAR)", "sql": "SELECT former_school FROM table_29418619_1 WHERE hometown = 'Detroit, MI';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average point count for tristan gommendy?", "schema": "CREATE TABLE table_name_30 (points INTEGER, driver VARCHAR)", "sql": "SELECT AVG(points) FROM table_name_30 WHERE driver = 'tristan gommendy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'rowtypes': Write the SELECT query (example 21).", "schema": null, "sql": "select * from people;", "explanation": "Regression test for Rowtypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from people) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 60).", "schema": null, "sql": "SELECT '9223372036854775807'::money;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '9223372036854775807'::money) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "List the water consumption for the agriculture industry in California in 2019, 2020, and 2021.", "schema": "CREATE TABLE industrial_water_usage (state VARCHAR(20), year INT, sector VARCHAR(30), usage FLOAT); INSERT INTO industrial_water_usage (state, year, sector, usage) VALUES ('California', 2019, 'Agriculture', 12345.6), ('California', 2020, 'Agriculture', 23456.7), ('California', 2021, 'Agriculture', 34567.8);", "sql": "SELECT year, usage FROM industrial_water_usage WHERE state = 'California' AND sector = 'Agriculture';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the minimum billing amount for each case in the 'billing' table, grouped by case type?", "schema": "CREATE TABLE cases (case_id INT, case_type VARCHAR(255)); INSERT INTO cases (case_id, case_type) VALUES (1, 'Criminal'), (2, 'Family'), (3, 'Personal Injury'), (4, 'Criminal'), (5, 'Family'); CREATE TABLE billing (bill_id INT, case_id INT, amount DECIMAL(10,2)); INSERT INTO billing (bill_id, case_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 600.00), (4, 3, 800.00), (5, 3, 900.00), (6, 4, 1000.00), (7, 4, 1200.00), (8, 5, 1500.00);", "sql": "SELECT cases.case_type, MIN(billing.amount) FROM billing JOIN cases ON billing.case_id = cases.case_id GROUP BY cases.case_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the date for rio de janeiro", "schema": "CREATE TABLE table_name_55 (date VARCHAR, city VARCHAR)", "sql": "SELECT date FROM table_name_55 WHERE city = 'rio de janeiro';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'update' (example 61).", "schema": null, "sql": "CREATE TABLE part_a_1_a_10 PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('a', 10);", "explanation": "DDL from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Insert a new record into the DefenseContractNegotiations table", "schema": "CREATE TABLE DefenseContractNegotiations (id INT, company_name VARCHAR(50), contract_value INT, negotiation_date DATE);", "sql": "INSERT INTO DefenseContractNegotiations (id, company_name, contract_value, negotiation_date) VALUES (1, 'ABC Corp', 1500000, '2021-03-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the league of the third baseman player before 1973?", "schema": "CREATE TABLE table_name_79 (league VARCHAR, year VARCHAR, position VARCHAR)", "sql": "SELECT league FROM table_name_79 WHERE year < 1973 AND position = 'third baseman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'rowsecurity': Write the SELECT query (example 583).", "schema": null, "sql": "SELECT * FROM rls_view; --fail - permission denied.\nEXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied.\n\n-- Query as role that is not the owner of the table or view with permissions.\nSET SESSION AUTHORIZATION regress_rls_alice;", "explanation": "Regression test for Rowsecurity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM rls_view; --fail - permission denied.\nEXPLAIN (COSTS OFF) SELECT * FROM rls_view; --fail - permission denied.\n\n-- Query as role that is not the owner of the table or view with permissions.\nSET SESSION AUTHORIZATION regress_rls_alice) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 3} {"question": "What is the percentage of upcycled clothing items in our inventory in Mexico?", "schema": "CREATE TABLE clothing_inventory (id INT, item_name VARCHAR(50), item_type VARCHAR(50), upcycled BOOLEAN, country_of_sale VARCHAR(50)); INSERT INTO clothing_inventory (id, item_name, item_type, upcycled, country_of_sale) VALUES (1, 'Upcycled Sweater', 'Clothing', true, 'Mexico'), (2, 'Vintage Dress', 'Clothing', false, 'Mexico'), (3, 'Recycled T-Shirt', 'Clothing', true, 'USA');", "sql": "SELECT (COUNT(*) FILTER (WHERE upcycled = true) * 100.0 / COUNT(*)) FROM clothing_inventory WHERE country_of_sale = 'Mexico';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the circuit for 13 july", "schema": "CREATE TABLE table_25322130_3 (circuit VARCHAR, date VARCHAR)", "sql": "SELECT circuit FROM table_25322130_3 WHERE date = '13 July';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which users have posted the most in the last 3 days?", "schema": "CREATE TABLE users (id INT, user_id INT); INSERT INTO users (id, user_id) VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); CREATE TABLE posts (id INT, user_id INT, post_date DATE); INSERT INTO posts (id, user_id, post_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-01-02'), (3, 1, '2022-01-03'), (4, 2, '2022-01-04'), (5, 3, '2022-01-05'), (6, 3, '2022-01-05'), (7, 1, '2022-01-06'), (8, 2, '2022-01-07'), (9, 4, '2022-01-08'), (10, 5, '2022-01-09'), (11, 1, '2022-01-10'), (12, 2, '2022-01-11');", "sql": "SELECT user_id, COUNT(*) AS num_posts FROM posts WHERE post_date >= DATEADD(day, -3, GETDATE()) GROUP BY user_id ORDER BY num_posts DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total avge of John Hall, who has less than 63 goals?", "schema": "CREATE TABLE table_name_25 (avge VARCHAR, name VARCHAR, goals VARCHAR)", "sql": "SELECT COUNT(avge) FROM table_name_25 WHERE name = 'john hall' AND goals < 63;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What are the unique resilience metrics for 'Transportation' systems?", "schema": "CREATE TABLE ResilienceMetrics (id INT, system_type TEXT, metric_number TEXT, description TEXT); INSERT INTO ResilienceMetrics (id, system_type, metric_number, description) VALUES (1, 'Power Grid', 'Metric123', 'Power outage duration'); INSERT INTO ResilienceMetrics (id, system_type, metric_number, description) VALUES (2, 'Transportation', 'Metric456', 'Road closure duration'); INSERT INTO ResilienceMetrics (id, system_type, metric_number, description) VALUES (3, 'Power Grid', 'Metric789', 'Restoration time'); INSERT INTO ResilienceMetrics (id, system_type, metric_number, description) VALUES (4, 'Transportation', 'Metric012', 'Bridge closure duration');", "sql": "SELECT DISTINCT metric_number, description FROM ResilienceMetrics WHERE system_type = 'Transportation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Alter the 'waste_generation_metrics' table to add a new column 'region'", "schema": "CREATE TABLE waste_generation_metrics ( country VARCHAR(50), year INT, generation_metric INT);", "sql": "ALTER TABLE waste_generation_metrics ADD COLUMN region VARCHAR(50);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 12).", "schema": null, "sql": "SELECT count(*) AS One FROM TIMESTAMPTZ_TBL WHERE d1 = timestamp with time zone 'tomorrow zulu';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) AS One FROM TIMESTAMPTZ_TBL WHERE d1 = timestamp with time zone 'tomorrow zulu') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the minimum salary for employees who identify as Native Hawaiian or Pacific Islander?", "schema": "CREATE TABLE Employees (EmployeeID INT, EmployeeName VARCHAR(50), Department VARCHAR(50), Salary DECIMAL(10,2), Ethnicity VARCHAR(50)); INSERT INTO Employees (EmployeeID, EmployeeName, Department, Salary, Ethnicity) VALUES (1, 'John Doe', 'IT', 75000.00, 'Asian'), (2, 'Jane Smith', 'IT', 80000.00, 'White'), (3, 'Mike Johnson', 'IT', 78000.00, 'Hispanic'), (4, 'Alice Williams', 'HR', 65000.00, 'Black'), (5, 'Bob Brown', 'Finance', 90000.00, 'Native Hawaiian or Pacific Islander'), (6, 'Charlie Davis', 'Finance', 95000.00, 'Asian');", "sql": "SELECT MIN(Salary) FROM Employees WHERE Ethnicity = 'Native Hawaiian or Pacific Islander';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What party was Lane Evans?", "schema": "CREATE TABLE table_1341568_14 (party VARCHAR, incumbent VARCHAR)", "sql": "SELECT party FROM table_1341568_14 WHERE incumbent = 'Lane Evans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number of losses for a team with more than 0 wins?", "schema": "CREATE TABLE table_name_53 (losses INTEGER, wins INTEGER)", "sql": "SELECT MIN(losses) FROM table_name_53 WHERE wins > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What are the total installed capacities of renewable energy projects for each country, sorted by the highest capacity first?", "schema": "CREATE TABLE Country (CountryID INT, CountryName VARCHAR(100)); INSERT INTO Country VALUES (1, 'Canada'), (2, 'USA'), (3, 'Mexico'); CREATE TABLE Project (ProjectID INT, ProjectName VARCHAR(100), CountryID INT); INSERT INTO Project VALUES (1, 'Solar Farm A', 1), (2, 'Wind Farm B', 2), (3, 'Hydro Plant C', 3); CREATE TABLE Capacity (CapacityID INT, ProjectID INT, Capacity FLOAT); INSERT INTO Capacity VALUES (1, 1, 100.5), (2, 2, 200.3), (3, 3, 300.1);", "sql": "SELECT CountryName, SUM(Capacity) AS TotalCapacity FROM Country JOIN Project ON Country.CountryID = Project.CountryID JOIN Capacity ON Project.ProjectID = Capacity.ProjectID GROUP BY CountryName ORDER BY TotalCapacity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the title of the episode that martin worth wrote the story for?", "schema": "CREATE TABLE table_name_55 (title VARCHAR, story VARCHAR)", "sql": "SELECT title FROM table_name_55 WHERE story = 'martin worth';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average time between vehicle maintenance events for each maintenance type?", "schema": "CREATE TABLE VehicleMaintenanceEvents (EventID int, VehicleID int, MaintenanceTypeID int, EventDateTime datetime); INSERT INTO VehicleMaintenanceEvents VALUES (1, 1, 1, '2022-01-01 08:00:00'), (2, 1, 2, '2022-01-01 10:00:00'), (3, 3, 1, '2022-01-01 09:00:00');", "sql": "SELECT M.MaintenanceType, AVG(DATEDIFF(minute, VME1.EventDateTime, VME2.EventDateTime)) as AvgTimeBetweenEvents FROM VehicleMaintenanceEvents VME1 INNER JOIN VehicleMaintenanceEvents VME2 ON VME1.VehicleID = VME2.VehicleID AND VME1.MaintenanceTypeID = VME2.MaintenanceTypeID AND VME1.EventID < VME2.EventID INNER JOIN MaintenanceTypes M ON VME1.MaintenanceTypeID = M.MaintenanceTypeID GROUP BY M.MaintenanceType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 412, "num_statements": 1} {"question": "What is the count of visitors from India who attended exhibitions in 2022?", "schema": "CREATE TABLE Visitors (visitor_id INT, visitor_name VARCHAR(50), country VARCHAR(50)); INSERT INTO Visitors (visitor_id, visitor_name, country) VALUES (1, 'John Doe', 'USA'), (2, 'Sita Patel', 'India'); CREATE TABLE Exhibitions (exhibition_id INT, exhibition_name VARCHAR(50), exhibition_year INT); INSERT INTO Exhibitions (exhibition_id, exhibition_name, exhibition_year) VALUES (1, 'Art of the 20th Century', 2022); CREATE TABLE Attendance (attendance_id INT, visitor_id INT, exhibition_id INT, attendance_date DATE); INSERT INTO Attendance (attendance_id, visitor_id, exhibition_id, attendance_date) VALUES (1, 1, 1, '2022-06-01'), (2, 2, 1, '2022-07-01');", "sql": "SELECT COUNT(*) FROM Visitors v JOIN Attendance a ON v.visitor_id = a.visitor_id JOIN Exhibitions e ON a.exhibition_id = e.exhibition_id WHERE v.country = 'India' AND e.exhibition_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "How many female and male employees are there in the company?", "schema": "CREATE TABLE Employees (EmployeeID INT, Name VARCHAR(50), Gender VARCHAR(50)); INSERT INTO Employees (EmployeeID, Name, Gender) VALUES (1, 'John Doe', 'Male'), (2, 'Jane Smith', 'Female'), (3, 'Mike Johnson', 'Male');", "sql": "SELECT Gender, COUNT(*) FROM Employees GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the total number of animal species in the 'animal_species' table grouped by region?", "schema": "CREATE TABLE animal_species_regions (species_id INT, species_name VARCHAR(50), region VARCHAR(50)); INSERT INTO animal_species_regions (species_id, species_name, region) VALUES (1, 'Deer', 'North'), (2, 'Bear', 'South'), (3, 'Wolf', 'North'), (4, 'Eagle', 'South');", "sql": "SELECT region, COUNT(*) as total_species_per_region FROM animal_species_regions GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "How many volunteers signed up in H1 and H2 of 2023?", "schema": "CREATE TABLE VolunteerSignups (VolunteerSignupID INT, VolunteerID INT, SignUpDate DATE); INSERT INTO VolunteerSignups (VolunteerSignupID, VolunteerID, SignUpDate) VALUES (1, 1, '2023-01-05'), (2, 2, '2023-04-10'), (3, 3, '2023-07-20');", "sql": "SELECT CONCAT('H', FLOOR(MONTH(SignUpDate) / 6)) AS Half, COUNT(*) as VolunteerCount FROM VolunteerSignups WHERE YEAR(SignUpDate) = 2023 GROUP BY Half;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "List the number of offshore platforms in each country in the South China Sea", "schema": "CREATE TABLE offshore_platforms (country VARCHAR(255), region VARCHAR(255), num_platforms INT); INSERT INTO offshore_platforms (country, region, num_platforms) VALUES ('Vietnam', 'South China Sea', 25), ('China', 'South China Sea', 30), ('Malaysia', 'South China Sea', 12);", "sql": "SELECT country, SUM(num_platforms) FROM offshore_platforms WHERE region = 'South China Sea' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "How many artifacts were analyzed in 2015?", "schema": "CREATE TABLE artifact_analysis (id INT, excavation_site_id INT, date DATE); INSERT INTO artifact_analysis (id, excavation_site_id, date) VALUES (1, 1, '2015-01-01'), (2, 1, '2015-02-01');", "sql": "SELECT COUNT(*) FROM artifact_analysis WHERE YEAR(date) = 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "List the organizations that have received donations from donors located in 'California', but have not received donations from donors located in 'New York'.", "schema": "CREATE TABLE donors (id INT, name TEXT, state TEXT); INSERT INTO donors (id, name, state) VALUES (1, 'John Doe', 'California'); CREATE TABLE donations (id INT, donor_id INT, org_id INT, donation_amount DECIMAL(10,2)); INSERT INTO donations (id, donor_id, org_id, donation_amount) VALUES (1, 1, 1, 100.00);", "sql": "SELECT organizations.name FROM organizations WHERE organizations.id IN (SELECT donations.org_id FROM donations JOIN donors ON donations.donor_id = donors.id WHERE donors.state = 'California') AND organizations.id NOT IN (SELECT donations.org_id FROM donations JOIN donors ON donations.donor_id = donors.id WHERE donors.state = 'New York');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 339, "num_statements": 1} {"question": "List the number of offshore drilling platforms in the Arabian Sea as of 2019.", "schema": "CREATE TABLE arabian_sea_platforms (year INT, region VARCHAR(20), num_platforms INT); INSERT INTO arabian_sea_platforms (year, region, num_platforms) VALUES (2015, 'Arabian Sea', 2500), (2016, 'Arabian Sea', 2600), (2017, 'Arabian Sea', 2700), (2018, 'Arabian Sea', 2800), (2019, 'Arabian Sea', 2900), (2020, 'Arabian Sea', 3000);", "sql": "SELECT num_platforms FROM arabian_sea_platforms WHERE year = 2019 AND region = 'Arabian Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "List all excavation sites and the number of artifacts associated with each site.", "schema": "CREATE TABLE excavation_sites (id INT, site_name VARCHAR(50), location VARCHAR(50), num_artifacts INT); INSERT INTO excavation_sites (id, site_name, location, num_artifacts) VALUES (1, 'Site A', 'USA', 30), (2, 'Site B', 'Mexico', 45), (3, 'Site C', 'Canada', 25);", "sql": "SELECT site_name, num_artifacts FROM excavation_sites;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Update the 'OceanFloorMapping' table to correct the depth for the 'Tonga Trench' to 10820 meters", "schema": "CREATE TABLE OceanFloorMapping (id INT, location VARCHAR(50), depth INT); INSERT INTO OceanFloorMapping (id, location, depth) VALUES (1, 'Mariana Trench', 10000), (2, 'Sunda Trench', 8000), (3, 'Philippine Trench', 6500), (4, 'Kermadec Trench', 10000), (5, 'Tonga Trench', 10800);", "sql": "UPDATE OceanFloorMapping SET depth = 10820 WHERE location = 'Tonga Trench';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Identify the vehicle models with the highest and lowest safety ratings in the 'safety_test_results' table.", "schema": "CREATE TABLE safety_test_results (id INT, vehicle_model VARCHAR, safety_rating DECIMAL(3,2));", "sql": "SELECT vehicle_model, safety_rating, RANK() OVER (PARTITION BY safety_rating ORDER BY safety_rating DESC) as rank FROM safety_test_results WHERE rank = 1 OR rank = (SELECT COUNT(*) FROM safety_test_results WHERE safety_rating = max_safety_rating) ORDER BY safety_rating;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 270, "num_statements": 1} {"question": "What are the product names and their origins for all clothing items in the inventory?", "schema": "CREATE TABLE ClothingInventory (product_id INT, product_name TEXT, origin TEXT); INSERT INTO ClothingInventory (product_id, product_name, origin) VALUES (1, 'Organic Cotton T-Shirt', 'India'), (2, 'Hemp Pants', 'China');", "sql": "SELECT product_name, origin FROM ClothingInventory;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total REE production by mine name for 2020?", "schema": "CREATE TABLE mines (id INT, name TEXT, location TEXT, annual_production INT); INSERT INTO mines (id, name, location, annual_production) VALUES (1, 'Mine A', 'Country X', 1500), (2, 'Mine B', 'Country Y', 2000), (3, 'Mine C', 'Country Z', 1750);", "sql": "SELECT name, SUM(annual_production) as total_production FROM mines WHERE YEAR(timestamp) = 2020 GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the away team from Adelaide?", "schema": "CREATE TABLE table_name_37 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_37 WHERE away_team = 'adelaide';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 97).", "schema": null, "sql": "SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::int2 preceding exclude no others),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::int2 preceding exclude no others),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 159, "num_statements": 1} {"question": "Identify the number of mental health parity violations by region?", "schema": "CREATE TABLE mental_health_parity_violations (id INT, region VARCHAR(50), violation_count INT); INSERT INTO mental_health_parity_violations (id, region, violation_count) VALUES (1, 'Northeast', 10), (2, 'Southeast', 5), (3, 'Midwest', 15);", "sql": "SELECT region, SUM(violation_count) as total_violations FROM mental_health_parity_violations GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the total installed renewable energy capacity for the 'renewable_energy_capacity' table by country?", "schema": "CREATE TABLE renewable_energy_capacity (country VARCHAR(50), wind_capacity NUMERIC(5,2), solar_capacity NUMERIC(5,2)); INSERT INTO renewable_energy_capacity (country, wind_capacity, solar_capacity) VALUES ('Germany', 30.0, 20.0), ('France', 40.0, 30.0), ('Canada', 50.0, 40.0);", "sql": "SELECT SUM(wind_capacity + solar_capacity) FROM renewable_energy_capacity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many shipwrecks have been recorded in the Caribbean Sea?", "schema": "CREATE TABLE caribbean_sea (id INT, year INT, shipwreck TEXT); INSERT INTO caribbean_sea (id, year, shipwreck) VALUES (1, 1650, 'Santa Maria');", "sql": "SELECT COUNT(shipwreck) FROM caribbean_sea;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the clock multiplier if the voltage range (v) is 3.3 - 3.6; input clock (mhz) is 33 x 3 / 50 x 2; and part number is a80486dx4-100?", "schema": "CREATE TABLE table_15261_1 (clock_multiplier VARCHAR, part_number VARCHAR, voltage_range__v_ VARCHAR, input_clock__mhz_ VARCHAR)", "sql": "SELECT clock_multiplier FROM table_15261_1 WHERE voltage_range__v_ = '3.3 - 3.6' AND input_clock__mhz_ = '33 X 3 / 50 X 2' AND part_number = 'A80486DX4-100';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "What is the total number of sensors in the Antarctic?", "schema": "CREATE TABLE sensors ( id INT PRIMARY KEY, location VARCHAR(255), type VARCHAR(255), value DECIMAL(10,2), timestamp TIMESTAMP); INSERT INTO sensors (id, location, type, value, timestamp) VALUES (1, 'Arctic', 'Temperature', 25.5, '2022-01-01 12:00:00'); INSERT INTO sensors (id, location, type, value, timestamp) VALUES (2, 'Antarctic', 'Humidity', 60.0, '2022-01-01 12:00:00');", "sql": "SELECT COUNT(*) FROM sensors WHERE location = 'Antarctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years have HMOs been 27% and POS plans 18%?", "schema": "CREATE TABLE table_name_54 (year VARCHAR, hmos VARCHAR, pos_plans VARCHAR)", "sql": "SELECT COUNT(year) FROM table_name_54 WHERE hmos = '27%' AND pos_plans = '18%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number of f/laps in the 5th position?", "schema": "CREATE TABLE table_name_17 (f_laps INTEGER, position VARCHAR)", "sql": "SELECT MIN(f_laps) FROM table_name_17 WHERE position = '5th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sanskrit word if the transcription is wan athit?", "schema": "CREATE TABLE table_180802_3 (sanskrit_word VARCHAR, transcription VARCHAR)", "sql": "SELECT sanskrit_word FROM table_180802_3 WHERE transcription = 'wan athit';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the number of distinct type of pets.", "schema": "CREATE TABLE pets (pettype VARCHAR)", "sql": "SELECT COUNT(DISTINCT pettype) FROM pets;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'tablefunc' (example 14).", "schema": null, "sql": "SELECT * FROM crosstab2('SELECT rowid, attribute, val FROM ct where rowclass = ''group2'' ORDER BY 1,2;');", "explanation": "Example query from the 'tablefunc' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: WHAT PERCENTAGE OF GLOBAL TOTAL EMISSIONS DID INDIA PRODUCE?", "schema": "CREATE TABLE table_11251601_2 (percentage_of_global_total VARCHAR, country VARCHAR)", "sql": "SELECT percentage_of_global_total FROM table_11251601_2 WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of poker players?", "schema": "CREATE TABLE poker_player (People_ID VARCHAR); CREATE TABLE people (Name VARCHAR, People_ID VARCHAR)", "sql": "SELECT T1.Name FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Delete a record with Id 123 from the \"Vessels\" table", "schema": "CREATE TABLE Vessels (Id INT PRIMARY KEY, Name VARCHAR(100), Type VARCHAR(50), Year INT);", "sql": "DELETE FROM Vessels WHERE Id = 123;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What record has the score 111-89?", "schema": "CREATE TABLE table_name_75 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_name_75 WHERE score = '111-89';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the H/A on 5 August 1990?", "schema": "CREATE TABLE table_name_46 (h___a VARCHAR, date VARCHAR)", "sql": "SELECT h___a FROM table_name_46 WHERE date = '5 august 1990';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the difference in total points scored between the first and second halves of each NBA game?", "schema": "CREATE TABLE games (game_id INT, first_half_points INT, second_half_points INT);", "sql": "SELECT game_id, first_half_points - second_half_points AS point_difference FROM games;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Dave Rummells's to par?", "schema": "CREATE TABLE table_name_22 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_22 WHERE player = 'dave rummells';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Delete records of recycling rates for 2019 in California.", "schema": "CREATE TABLE recycling_rates(year INT, state VARCHAR(255), plastic_recycling FLOAT, paper_recycling FLOAT, glass_recycling FLOAT); INSERT INTO recycling_rates VALUES (2019, 'California', 0.6, 0.7, 0.5), (2020, 'California', 0.7, 0.8, 0.6);", "sql": "DELETE FROM recycling_rates WHERE year = 2019 AND state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average age of volunteers with engineering expertise?", "schema": "CREATE TABLE volunteers (volunteer_id INTEGER, name TEXT, age INTEGER, expertise TEXT); INSERT INTO volunteers (volunteer_id, name, age, expertise) VALUES (1, 'Alice', 25, 'Medical'), (2, 'Bob', 30, 'Engineering'), (3, 'Charlie', 35, 'Medical');", "sql": "SELECT AVG(v.age) FROM volunteers v WHERE v.expertise = 'Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 56).", "schema": null, "sql": "/****************************************************************************/\n-- Test view_owner_is().\nSELECT * FROM check_test(\n view_owner_is('public', 'someview', current_user, 'mumble'),\n\ttrue,\n 'view_owner_is(sch, view, user, desc)',\n 'mumble',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "What is the average market access time for drugs in Africa?", "schema": "CREATE TABLE market_access (access_id INT, drug_name TEXT, access_time INT, region TEXT); INSERT INTO market_access (access_id, drug_name, access_time, region) VALUES (1, 'DrugI', 365, 'Africa'), (2, 'DrugJ', 420, 'Africa');", "sql": "SELECT region, AVG(access_time) as avg_access_time FROM market_access WHERE region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Insert a new record for a company with the name 'LMN Inc.' and an ESG rating of 'A-' into the companies table.", "schema": "CREATE TABLE companies (id INT, name VARCHAR(50), esg_rating VARCHAR(2)); INSERT INTO companies (id, name, esg_rating) VALUES (1, 'ABC Inc.', 'A'); INSERT INTO companies (id, name, esg_rating) VALUES (2, 'DEF Inc.', 'B'); INSERT INTO companies (id, name, esg_rating) VALUES (3, 'XYZ Inc.', 'C');", "sql": "INSERT INTO companies (name, esg_rating) VALUES ('LMN Inc.', 'A-');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 147).", "schema": null, "sql": "INSERT INTO temporal_mltrng (id, valid_at) VALUES ('[1,2)', datemultirange(daterange('2018-03-03', '2018-04-04')));", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which winner has a P stage?", "schema": "CREATE TABLE table_name_48 (winner VARCHAR, stage VARCHAR)", "sql": "SELECT winner FROM table_name_48 WHERE stage = 'p';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "How many users purchased a monthly pass in the last quarter?", "schema": "CREATE TABLE user_profiles (user_id INT, user_name VARCHAR(255), pass_type VARCHAR(255), purchase_date DATE); INSERT INTO user_profiles (user_id, user_name, pass_type, purchase_date) VALUES (1, 'Alice', 'Monthly', '2022-03-01'), (2, 'Bob', 'Weekly', '2022-03-02'), (3, 'Charlie', 'Monthly', '2022-01-01');", "sql": "SELECT COUNT(*) FROM user_profiles WHERE pass_type = 'Monthly' AND purchase_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What department has an m.phil (physics) qualification?", "schema": "CREATE TABLE table_name_25 (department VARCHAR, qualification VARCHAR)", "sql": "SELECT department FROM table_name_25 WHERE qualification = 'm.phil (physics)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of weeks that the opponent was the Denver Broncos?", "schema": "CREATE TABLE table_name_78 (week INTEGER, opponent VARCHAR)", "sql": "SELECT AVG(week) FROM table_name_78 WHERE opponent = 'denver broncos';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average game number that was on october 19?", "schema": "CREATE TABLE table_name_4 (game INTEGER, october VARCHAR)", "sql": "SELECT AVG(game) FROM table_name_4 WHERE october = 19;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Which indigenous communities have experienced relocation due to coastal erosion?", "schema": "CREATE TABLE Relocations (community TEXT, year INT, reason TEXT); INSERT INTO Relocations (community, year, reason) VALUES ('Inuit', 1995, 'Erosion'), ('Inuit', 2005, 'Flooding'), ('Sami', 2010, 'Thawing Permafrost'), ('Gwich’in', 2015, 'Thawing Permafrost'), ('Yupik', 2020, 'Coastal Erosion'), ('Aleut', 2018, 'Coastal Erosion');", "sql": "SELECT community FROM Relocations WHERE reason = 'Coastal Erosion' GROUP BY community;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What points has 18 for the played and bala rfc as the club?", "schema": "CREATE TABLE table_name_31 (points_for VARCHAR, played VARCHAR, club VARCHAR)", "sql": "SELECT points_for FROM table_name_31 WHERE played = '18' AND club = 'bala rfc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the result 0–3?", "schema": "CREATE TABLE table_name_48 (date VARCHAR, result VARCHAR)", "sql": "SELECT date FROM table_name_48 WHERE result = '0–3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER PROCEDURE (example 5).", "schema": null, "sql": "ALTER PROCEDURE check_password(text) SET search_path = admin, pg_temp;", "explanation": "PostgreSQL ALTER PROCEDURE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of tourists who visited eco-friendly accommodations in Scandinavia and the Caribbean?", "schema": "CREATE TABLE scandinavia_accommodations (id INT, country TEXT, eco_visitors INT, year INT); INSERT INTO scandinavia_accommodations VALUES (1, 'Norway', 500, 2022), (2, 'Sweden', 600, 2022); CREATE TABLE caribbean_accommodations (id INT, country TEXT, eco_visitors INT, year INT); INSERT INTO caribbean_accommodations VALUES (1, 'Bahamas', 700, 2022), (2, 'Jamaica', 800, 2022);", "sql": "SELECT SUM(eco_visitors) FROM scandinavia_accommodations WHERE year = 2022 UNION ALL SELECT SUM(eco_visitors) FROM caribbean_accommodations WHERE year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Location of the 10th Iteration?", "schema": "CREATE TABLE table_name_40 (location VARCHAR, iteration VARCHAR)", "sql": "SELECT location FROM table_name_40 WHERE iteration = '10th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which tree species have the highest carbon sequestration rate in the tropical region?", "schema": "CREATE TABLE tree_carbon_sequestration (tree_id INT, species_id INT, region_id INT, sequestration_rate DECIMAL(5,2)); INSERT INTO tree_carbon_sequestration (tree_id, species_id, region_id, sequestration_rate) VALUES (1, 1, 1, 20.5), (2, 2, 1, 19.3), (3, 3, 2, 25.0), (4, 4, 2, 23.6); CREATE TABLE regions (region_id INT, region_name VARCHAR(100)); INSERT INTO regions (region_id, region_name) VALUES (1, 'Tropical'), (2, 'Temperate'); CREATE TABLE tree_species (species_id INT, species_name VARCHAR(100)); INSERT INTO tree_species (species_id, species_name) VALUES (1, 'Mahogany'), (2, 'Teak'), (3, 'Ebony'), (4, 'Rosewood');", "sql": "SELECT ts.species_name, tcs.sequestration_rate FROM tree_carbon_sequestration tcs JOIN regions r ON tcs.region_id = r.region_id JOIN tree_species ts ON tcs.species_id = ts.species_id WHERE r.region_name = 'Tropical' ORDER BY tcs.sequestration_rate DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 261, "num_statements": 1} {"question": "What is the average age of players in the basketball_teams table?", "schema": "CREATE TABLE basketball_teams (team_id INT, team_name VARCHAR(100), player_name VARCHAR(100), player_age INT);", "sql": "SELECT AVG(player_age) FROM basketball_teams;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What party did the re-elected incumbent of the Texas 11 district belong to?", "schema": "CREATE TABLE table_1342149_43 (party VARCHAR, result VARCHAR, district VARCHAR)", "sql": "SELECT party FROM table_1342149_43 WHERE result = 'Re-elected' AND district = 'Texas 11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 3).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION os_name();", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the average square footage of LEED-certified buildings in New York City, partitioned by their construction types?", "schema": "CREATE TABLE leed_buildings (id INT, building_name VARCHAR(50), square_footage FLOAT, construction_type VARCHAR(50), city VARCHAR(50), state VARCHAR(50)); INSERT INTO leed_buildings (id, building_name, square_footage, construction_type, city, state) VALUES (1, 'GreenHQ', 120000, 'Steel', 'New York', 'NY'), (2, 'EcoTower', 200000, 'Concrete', 'New York', 'NY'), (3, 'SustainableSpire', 180000, 'Wood', 'New York', 'NY');", "sql": "SELECT construction_type, AVG(square_footage) as avg_square_footage FROM leed_buildings WHERE city = 'New York' AND state = 'NY' AND construction_type IN ('Steel', 'Concrete', 'Wood') GROUP BY construction_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 71).", "schema": null, "sql": "SELECT name FROM srt WHERE name !~~ '%B%' ORDER BY name;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Away team has a Venue of arden street oval?", "schema": "CREATE TABLE table_name_32 (away_team VARCHAR, venue VARCHAR)", "sql": "SELECT away_team FROM table_name_32 WHERE venue = 'arden street oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the average number of hours volunteered per volunteer in 2021?", "schema": "CREATE TABLE VolunteerHours (VolunteerID INT, Hours INT, VolunteerDate DATE); INSERT INTO VolunteerHours VALUES (1, 4, '2021-01-01'), (1, 6, '2021-02-01'), (2, 8, '2021-03-01');", "sql": "SELECT AVG(Hours) FROM VolunteerHours WHERE YEAR(VolunteerDate) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Get the total spending on military innovation by each country for the year 2019", "schema": "CREATE TABLE military_innovation (id INT, weapon_system VARCHAR(255), country VARCHAR(255), year INT);", "sql": "SELECT country, SUM(year) FROM military_innovation WHERE year = 2019 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Delete all records from the 'equipment_inventory' table where the quantity is 0.", "schema": "CREATE TABLE equipment_inventory (id INT, type VARCHAR(50), quantity INT); INSERT INTO equipment_inventory (id, type, quantity) VALUES (1, 'Drilling Machine', 15); INSERT INTO equipment_inventory (id, type, quantity) VALUES (2, 'Excavator', 8); INSERT INTO equipment_inventory (id, type, quantity) VALUES (3, 'Bulldozer', 5); INSERT INTO equipment_inventory (id, type, quantity) VALUES (4, 'Dump Truck', 0);", "sql": "DELETE FROM equipment_inventory WHERE quantity = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of game was held against France with the results of 3:1?", "schema": "CREATE TABLE table_name_20 (type_of_game VARCHAR, results¹ VARCHAR, opponent VARCHAR)", "sql": "SELECT type_of_game FROM table_name_20 WHERE results¹ = '3:1' AND opponent = 'france';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the winning score for the tournament played on Jul 14, 1973?", "schema": "CREATE TABLE table_name_96 (winning_score VARCHAR, date VARCHAR)", "sql": "SELECT winning_score FROM table_name_96 WHERE date = 'jul 14, 1973';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 284).", "schema": null, "sql": "SELECT to_timestamp('2011-12-18 11:38 +05:20', 'YYYY-MM-DD HH12:MI TZH:TZM');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('2011-12-18 11:38 +05:20', 'YYYY-MM-DD HH12:MI TZH:TZM')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the greatest B score when the A score was less than 6.5?", "schema": "CREATE TABLE table_name_4 (b_score INTEGER, a_score INTEGER)", "sql": "SELECT MAX(b_score) FROM table_name_4 WHERE a_score < 6.5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 21).", "schema": null, "sql": "SELECT * FROM run_extra_fails();", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the result for green bay packers", "schema": "CREATE TABLE table_name_77 (result VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_77 WHERE opponent = 'green bay packers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location/state of the Launceston race?", "schema": "CREATE TABLE table_name_60 (location___state VARCHAR, race_title VARCHAR)", "sql": "SELECT location___state FROM table_name_60 WHERE race_title = 'launceston';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many visitors identified as BIPOC attended the last three exhibitions at the local museum?", "schema": "CREATE TABLE museum_visitors (id INT, exhibition_date DATE, is_bipoc BOOLEAN); INSERT INTO museum_visitors (id, exhibition_date, is_bipoc) VALUES (1, '2022-05-01', TRUE), (2, '2022-05-01', FALSE), (3, '2022-05-02', TRUE);", "sql": "SELECT COUNT(*) FROM (SELECT * FROM museum_visitors WHERE exhibition_date IN ('2022-04-30', '2022-05-01', '2022-05-02') AND is_bipoc = TRUE) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "List the names, roles, and years of experience of all male mining engineers.", "schema": "CREATE TABLE mine_operators (id INT PRIMARY KEY, name VARCHAR(50), role VARCHAR(50), gender VARCHAR(10), years_of_experience INT); INSERT INTO mine_operators (id, name, role, gender, years_of_experience) VALUES (1, 'John Doe', 'Mining Engineer', 'Male', 7), (2, 'Maria', 'Mining Engineer', 'Female', 5);", "sql": "SELECT name, role, years_of_experience FROM mine_operators WHERE gender = 'Male';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1.93 that is 2.05 in height?", "schema": "CREATE TABLE table_name_77 (height VARCHAR)", "sql": "SELECT 193 FROM table_name_77 WHERE height = 2.05;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Identify AI models with low performance scores and their sectors.", "schema": "CREATE TABLE ai_models (model_name TEXT, performance_score INTEGER, sector TEXT); INSERT INTO ai_models (model_name, performance_score, sector) VALUES ('Model1', 75, 'Finance'), ('Model2', 85, 'Healthcare'), ('Model3', 60, 'Finance'), ('Model4', 90, 'Healthcare');", "sql": "SELECT sector, model_name, performance_score FROM ai_models WHERE performance_score < 70;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what day was the score of 1–6 6–1 3–6 achieved?", "schema": "CREATE TABLE table_name_12 (date VARCHAR, score VARCHAR)", "sql": "SELECT date FROM table_name_12 WHERE score = '1–6 6–1 3–6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Insert data into the archaeologist table", "schema": "CREATE TABLE Archaeologists (ArchaeologistID INT PRIMARY KEY, FirstName VARCHAR(255), LastName VARCHAR(255), Specialty TEXT, Country VARCHAR(255)); INSERT INTO Archaeologists (ArchaeologistID, FirstName, LastName, Specialty, Country) VALUES (1, 'Howard', 'Carter', 'Egyptology', 'England');", "sql": "INSERT INTO Archaeologists (ArchaeologistID, FirstName, LastName, Specialty, Country) VALUES (1, 'Howard', 'Carter', 'Egyptology', 'England');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average drawn of the team with a difference of 4 and more than 13 losses?", "schema": "CREATE TABLE table_name_30 (drawn INTEGER, difference VARCHAR, lost VARCHAR)", "sql": "SELECT AVG(drawn) FROM table_name_30 WHERE difference = '4' AND lost > 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 107).", "schema": null, "sql": "SELECT xmlserialize(CONTENT '42' AS text) = xmlserialize(CONTENT '42' AS text NO INDENT);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlserialize(CONTENT '42' AS text) = xmlserialize(CONTENT '42' AS text NO INDENT)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Update records of donors with the first name 'John' and add a $250 bonus donation made on Dec 31, 2022", "schema": "CREATE TABLE Donors (DonorID INT, FirstName VARCHAR(50), LastName VARCHAR(50), DonationDate DATE, Amount DECIMAL(10,2));", "sql": "UPDATE Donors SET DonationDate = '2022-12-31', Amount = Amount + 250 WHERE FirstName = 'John';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "PostgreSQL regression test 'generated_stored': Write the SELECT query (example 26).", "schema": null, "sql": "SELECT gtest1 FROM gtest1 ORDER BY a; -- whole-row reference\nSELECT a, (SELECT gtest1.b) FROM gtest1 ORDER BY a; -- sublink\nDELETE FROM gtest1 WHERE a >= 3;", "explanation": "Regression test for Generated Stored in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT gtest1 FROM gtest1 ORDER BY a; -- whole-row reference\nSELECT a, (SELECT gtest1.b) FROM gtest1 ORDER BY a; -- sublink\nDELETE FROM gtest1 WHERE a >= 3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 3} {"question": "What is the average donation amount for 'Anna Lee' in 2021?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount FLOAT, cause TEXT, donation_date DATE);", "sql": "SELECT AVG(donation_amount) FROM donors WHERE donor_name = 'Anna Lee' AND donation_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 733).", "schema": null, "sql": "SELECT to_number('$1234.56','L9,999.99');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_number('$1234.56','L9,999.99')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What was the market access strategy for 'DrugB' in Japan?", "schema": "CREATE TABLE market_access(drug_name TEXT, market_country TEXT, strategy_description TEXT); INSERT INTO market_access(drug_name, market_country, strategy_description) VALUES('DrugB', 'Japan', 'Direct to consumer');", "sql": "SELECT strategy_description FROM market_access WHERE drug_name = 'DrugB' AND market_country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the GamePlan when Jacqueline King is the actor in the original production?", "schema": "CREATE TABLE table_name_71 (gameplan VARCHAR, actor_in_original_production VARCHAR)", "sql": "SELECT gameplan FROM table_name_71 WHERE actor_in_original_production = 'jacqueline king';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'tsearch' (example 389).", "schema": null, "sql": "insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));", "explanation": "DML from PostgreSQL core regression test for Tsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the Record that has the Opponent of vs. hamilton tiger cats?", "schema": "CREATE TABLE table_name_55 (record VARCHAR, opponent VARCHAR)", "sql": "SELECT record FROM table_name_55 WHERE opponent = 'vs. hamilton tiger cats';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average heart rate during workouts for members who have a premium membership?", "schema": "CREATE TABLE member_heart_rate (member_id INT, workout_id INT, heart_rate INT); INSERT INTO member_heart_rate (member_id, workout_id, heart_rate) VALUES (1, 1, 120), (1, 2, 130), (2, 3, 150), (2, 4, 140), (3, 5, 110); CREATE TABLE premium_members (member_id INT, is_premium BOOLEAN); INSERT INTO premium_members (member_id, is_premium) VALUES (1, TRUE), (2, TRUE), (3, FALSE);", "sql": "SELECT AVG(heart_rate) FROM member_heart_rate JOIN premium_members ON member_heart_rate.member_id = premium_members.member_id WHERE is_premium = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the number of community development initiatives and their success rate, categorized by region and initiative status?", "schema": "CREATE TABLE initiative (initiative_id INT, initiative_start_date DATE, initiative_end_date DATE, budget FLOAT, region VARCHAR(50), status VARCHAR(50), success BOOLEAN); INSERT INTO initiative (initiative_id, initiative_start_date, initiative_end_date, budget, region, status, success) VALUES (3, '2018-01-01', '2019-12-31', 30000.0, 'R3', 'completed', true), (4, '2020-06-15', '2022-05-30', 80000.0, 'R4', 'in progress', false);", "sql": "SELECT region, status, COUNT(*) AS num_initiatives, SUM(success) AS num_successful FROM initiative GROUP BY region, status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the total revenue generated by virtual tours in Mexico and Brazil?", "schema": "CREATE TABLE countries (country_id INT, country TEXT); INSERT INTO countries (country_id, country) VALUES (1, 'Mexico'), (2, 'Brazil'); CREATE TABLE virtual_tours (tour_id INT, country_id INT, views INT, revenue FLOAT); INSERT INTO virtual_tours (tour_id, country_id, views, revenue) VALUES (1, 1, 500, 200.0), (2, 1, 600, 300.0), (3, 2, 700, 400.0), (4, 2, 800, 500.0);", "sql": "SELECT SUM(revenue) FROM virtual_tours WHERE country_id IN (SELECT country_id FROM countries WHERE country IN ('Mexico', 'Brazil'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the rowers when notes were sa/b, with a time of 5:51.30?", "schema": "CREATE TABLE table_name_76 (rowers VARCHAR, notes VARCHAR, time VARCHAR)", "sql": "SELECT rowers FROM table_name_76 WHERE notes = 'sa/b' AND time = '5:51.30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many teachers have completed the 'Critical Pedagogy' course in 2023?", "schema": "CREATE TABLE Completions (CompletionID INT, StudentID INT, CourseID INT, CompletionDate DATE); INSERT INTO Completions (CompletionID, StudentID, CourseID, CompletionDate) VALUES (1, 1, 1, '2023-01-01'), (2, 2, 2, '2023-01-02'), (3, 3, 1, '2023-01-03'), (4, 5, 2, '2023-01-04'); CREATE TABLE Courses (CourseID INT, CourseName VARCHAR(50), Cost INT, CertificationYear INT, TargetAudience VARCHAR(20)); INSERT INTO Courses (CourseID, CourseName, Cost, CertificationYear, TargetAudience) VALUES (1, 'Critical Pedagogy', 250, 2023, 'Teachers'), (2, 'Data Analysis for Educators', 250, 2023, 'Teachers');", "sql": "SELECT COUNT(*) FROM Completions INNER JOIN Courses ON Completions.CourseID = Courses.CourseID WHERE Courses.CourseName = 'Critical Pedagogy' AND CertificationYear = 2023 AND TargetAudience = 'Teachers';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "What was the total R&D expenditure on drugs approved in 2019?", "schema": "CREATE TABLE drug_approval (drug_name TEXT, approval_year INTEGER);", "sql": "SELECT SUM(expenditure) FROM rd_expenditure INNER JOIN drug_approval a ON rd_expenditure.drug_name = a.drug_name WHERE a.approval_year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the surface clay with Cyril Saulnier as the opponent in the final?", "schema": "CREATE TABLE table_name_28 (date VARCHAR, surface VARCHAR, opponent_in_the_final VARCHAR)", "sql": "SELECT date FROM table_name_28 WHERE surface = 'clay' AND opponent_in_the_final = 'cyril saulnier';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (with_telemetry, item 1).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.get_telemetry_report()\n RETURNS jsonb AS '@MODULE_PATHNAME@', 'ts_telemetry_get_report_jsonb'\n LANGUAGE C STABLE PARALLEL SAFE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 178, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had a to par of +2 and a score of 78-69-68=215?", "schema": "CREATE TABLE table_name_22 (player VARCHAR, to_par VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_22 WHERE to_par = '+2' AND score = 78 - 69 - 68 = 215;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the City of license that has a ERP kW more than 4.5 and w23dr-d?", "schema": "CREATE TABLE table_name_45 (city_of_license VARCHAR, erp_kw VARCHAR, station VARCHAR)", "sql": "SELECT city_of_license FROM table_name_45 WHERE erp_kw > 4.5 AND station = 'w23dr-d';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the minimum and maximum age of players who have participated in esports events?", "schema": "CREATE TABLE EsportsPlayers (PlayerID INT, Age INT, EventID INT); INSERT INTO EsportsPlayers (PlayerID, Age, EventID) VALUES (1, 22, 1), (2, 25, 2), (3, 28, 3), (4, 30, 4);", "sql": "SELECT MIN(Age), MAX(Age) FROM EsportsPlayers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "List all climate change projects in India", "schema": "CREATE TABLE climate_projects (project_id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), budget FLOAT); INSERT INTO climate_projects (project_id, name, country, budget) VALUES (1, 'Solar Power India', 'India', 5000000); INSERT INTO climate_projects (project_id, name, country, budget) VALUES (2, 'Wind Farms India', 'India', 7000000);", "sql": "SELECT * FROM climate_projects WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'conversion' (example 25).", "schema": null, "sql": "CREATE TABLE utf8_inputs (inbytes bytea, description text);", "explanation": "DDL from PostgreSQL core regression test for Conversion.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Call (example 80).", "schema": null, "sql": "DO $$\nBEGIN\n UPDATE t_test SET x = x + 1;\n RAISE NOTICE 'f_get_x(%)', f_get_x();\n CALL f_print_x(f_get_x());\n UPDATE t_test SET x = x + 1;\n RAISE NOTICE 'f_get_x(%)', f_get_x();\n CALL f_print_x(f_get_x());\nEND\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Call.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 219, "num_statements": 7} {"question": "Generate PostgreSQL SQL for: What is the greatest played with a drawn less than 1 and a position of less than 1?", "schema": "CREATE TABLE table_name_99 (played INTEGER, drawn VARCHAR, position VARCHAR)", "sql": "SELECT MAX(played) FROM table_name_99 WHERE drawn < 1 AND position < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which venue has a week smaller than 2?", "schema": "CREATE TABLE table_name_29 (venue VARCHAR, week INTEGER)", "sql": "SELECT venue FROM table_name_29 WHERE week < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 60 is the tries against what is the tries for?", "schema": "CREATE TABLE table_17941032_1 (tries_for VARCHAR, tries_against VARCHAR)", "sql": "SELECT tries_for FROM table_17941032_1 WHERE tries_against = '60';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the average budget of SIGINT agencies by region?", "schema": "CREATE SCHEMA if not exists sigint_budget AUTHORIZATION defsec;CREATE TABLE if not exists sigint_budget.info (id INT, name VARCHAR(100), region VARCHAR(50), budget INT);INSERT INTO sigint_budget.info (id, name, region, budget) VALUES (1, 'NSA', 'US - East', 15000000000);INSERT INTO sigint_budget.info (id, name, region, budget) VALUES (2, 'GCHQ', 'Europe - West', 8000000000);INSERT INTO sigint_budget.info (id, name, region, budget) VALUES (3, 'DGSE', 'Europe - West', 5000000000);", "sql": "SELECT region, AVG(budget) as avg_budget FROM sigint_budget.info WHERE name LIKE '%SIGINT%' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the first year and last year of parties with theme \"Spring\" or \"Teqnology\".", "schema": "CREATE TABLE party (First_year VARCHAR, Last_year VARCHAR, Party_Theme VARCHAR)", "sql": "SELECT First_year, Last_year FROM party WHERE Party_Theme = 'Spring' OR Party_Theme = 'Teqnology';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2011 number (,000) when the status is separated?", "schema": "CREATE TABLE table_273617_6 (status VARCHAR)", "sql": "SELECT 2011 AS _number__, 000 AS _ FROM table_273617_6 WHERE status = 'Separated';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what album is 4:30 long", "schema": "CREATE TABLE table_name_95 (album VARCHAR, length VARCHAR)", "sql": "SELECT album FROM table_name_95 WHERE length = '4:30';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the championship where winning score is −12 (74-66-65-71=276)", "schema": "CREATE TABLE table_11570261_1 (championship VARCHAR, winning_score VARCHAR)", "sql": "SELECT championship FROM table_11570261_1 WHERE winning_score = −12(74 - 66 - 65 - 71 = 276);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 513).", "schema": null, "sql": "CREATE ROLE regress_constraint_comments_noaccess;", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT regcollation('-')::oid;", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regcollation('-')::oid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "What is the average claim amount for health insurance policies, for policyholders over the age of 65?", "schema": "CREATE TABLE claims (claim_id INT, policy_id INT, claim_amount DECIMAL(10,2)); CREATE TABLE policies (policy_id INT, policy_holder_id INT, policy_type VARCHAR(50), issue_date DATE, policy_holder_dob DATE);", "sql": "SELECT AVG(claim_amount) FROM claims c JOIN policies p ON c.policy_id = p.policy_id WHERE p.policy_type = 'health' AND DATEDIFF(YEAR, p.policy_holder_dob, GETDATE()) > 65;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which To par has a Place of t6, and a Player of nick faldo?", "schema": "CREATE TABLE table_name_56 (to_par VARCHAR, place VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_56 WHERE place = 't6' AND player = 'nick faldo';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Title has a Type of album in 1983?", "schema": "CREATE TABLE table_name_34 (title VARCHAR, type VARCHAR, year VARCHAR)", "sql": "SELECT title FROM table_name_34 WHERE type = 'album' AND year = 1983;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'opr_sanity': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT p1.oid, p1.proname\nFROM pg_proc as p1\nWHERE proargnames IS NOT NULL AND\n array_length(proargnames,1) < array_length(proargtypes,1);", "explanation": "Regression test for Opr Sanity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT p1.oid, p1.proname\nFROM pg_proc as p1\nWHERE proargnames IS NOT NULL AND\n array_length(proargnames,1) < array_length(proargtypes,1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Democratic has a District smaller than 7, and a Republican of dan mansell?", "schema": "CREATE TABLE table_name_99 (democratic VARCHAR, district VARCHAR, republican VARCHAR)", "sql": "SELECT democratic FROM table_name_99 WHERE district < 7 AND republican = 'dan mansell';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What is the total number of students and teachers who have ever participated in professional development programs, regardless of completion status, in the education schema?", "schema": "CREATE SCHEMA education;CREATE TABLE professional_development (id INT, role VARCHAR(10), name VARCHAR(50), completed BOOLEAN);INSERT INTO professional_development (id, role, name, completed) VALUES (1, 'student', 'John Doe', FALSE), (2, 'teacher', 'Jane Smith', TRUE);", "sql": "SELECT COUNT(*) FROM education.professional_development;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total population of all species in the 'arctic_biodiversity' table?", "schema": "CREATE TABLE arctic_biodiversity (id INTEGER, species VARCHAR(255), population INTEGER);", "sql": "SELECT SUM(population) AS total_population FROM arctic_biodiversity;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What are the average temperatures (in Celsius) for all chemical storage facilities in Canada and the United States, grouped by country?", "schema": "CREATE TABLE storage_facilities (facility_id INT, facility_name TEXT, country TEXT, temperature DECIMAL(5,2)); INSERT INTO storage_facilities (facility_id, facility_name, country, temperature) VALUES (1, 'Facility A', 'Canada', 15.5), (2, 'Facility B', 'Canada', 18.3), (3, 'Facility C', 'USA', 22.1), (4, 'Facility D', 'USA', 20.0);", "sql": "SELECT country, AVG(temperature) as avg_temperature FROM storage_facilities WHERE country IN ('Canada', 'USA') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "List the destinations that received less than 10 packages per day on average in October 2021", "schema": "CREATE TABLE Shipments (id INT, destination VARCHAR(50), packages INT, timestamp DATE); INSERT INTO Shipments (id, destination, packages, timestamp) VALUES (1, 'Kolkata', 5, '2021-10-01'), (2, 'Mumbai', 8, '2021-10-02'), (3, 'Delhi', 3, '2021-10-03'), (4, 'Bangalore', 7, '2021-10-04'), (5, 'Hyderabad', 6, '2021-10-05');", "sql": "SELECT destination FROM Shipments WHERE (packages/5) < 10 GROUP BY destination HAVING COUNT(DISTINCT timestamp) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average rating of algorithms 'V' and 'W'?", "schema": "CREATE TABLE IF NOT EXISTS ai_feedback (algorithm_name TEXT, user_feedback TEXT, rating INTEGER); INSERT INTO ai_feedback (algorithm_name, user_feedback, rating) VALUES ('Algorithm T', 'Feedback 1', 4), ('Algorithm U', 'Feedback 2', 5), ('Algorithm V', 'Feedback 3', 5), ('Algorithm W', 'Feedback 4', 4);", "sql": "SELECT AVG(rating) as avg_rating FROM ai_feedback WHERE algorithm_name IN ('Algorithm V', 'Algorithm W');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the starts for bobby labonte", "schema": "CREATE TABLE table_27786562_1 (starts VARCHAR, driver VARCHAR)", "sql": "SELECT starts FROM table_27786562_1 WHERE driver = 'Bobby Labonte';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 201).", "schema": null, "sql": "-- Test diagnostics\nSELECT * FROM check_test(\n is_normal_function( 'none', 'whatever' ),\n false,\n 'is_normal_function(nofunc, desc)',\n 'whatever',\n ' Function \"none\"() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the date of the game that had a loss of Gott (2-7)?", "schema": "CREATE TABLE table_name_98 (date VARCHAR, loss VARCHAR)", "sql": "SELECT date FROM table_name_98 WHERE loss = 'gott (2-7)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Party, when Minister is \"Franco Frattini\"?", "schema": "CREATE TABLE table_name_55 (party VARCHAR, minister VARCHAR)", "sql": "SELECT party FROM table_name_55 WHERE minister = 'franco frattini';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "pgTAP test for Moretap (assertion 4).", "schema": null, "sql": "-- Test fail().\n\\set fail_numb 3\n\\echo ok :fail_numb - Testing fail()\nSELECT is(\n fail('oops'),\n format( E'not ok %1$s - oops\\n# Failed test %1$s: \"oops\"', :fail_numb ),\n 'We should get the proper output from fail()'\n);", "explanation": "SQL assertion from pgTAP test suite for Moretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 228, "num_statements": 1} {"question": "What is the minimum mental health parity score for healthcare providers in New York?", "schema": "CREATE TABLE healthcare_providers (id INT, name TEXT, mental_health_parity_score INT, state TEXT); INSERT INTO healthcare_providers (id, name, mental_health_parity_score, state) VALUES (1, 'Dr. Jane Smith', 95, 'New York'); INSERT INTO healthcare_providers (id, name, mental_health_parity_score, state) VALUES (2, 'Dr. Maria Garcia', 88, 'Texas'); INSERT INTO healthcare_providers (id, name, mental_health_parity_score, state) VALUES (3, 'Dr. David Kim', 92, 'New York');", "sql": "SELECT MIN(mental_health_parity_score) FROM healthcare_providers WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total revenue from broadband services for each region?", "schema": "CREATE TABLE broadband_services (service_id INT, region VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO broadband_services (service_id, region, revenue) VALUES (1, 'North', 5000), (2, 'South', 7000);", "sql": "SELECT region, SUM(revenue) FROM broadband_services GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the April 16 game?", "schema": "CREATE TABLE table_name_94 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_94 WHERE date = 'april 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the challenge for episode 28?", "schema": "CREATE TABLE table_24798489_2 (challenge VARCHAR, episode_number VARCHAR)", "sql": "SELECT challenge FROM table_24798489_2 WHERE episode_number = 28;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which language do 1% of males speak?", "schema": "CREATE TABLE table_name_31 (percentage___percentage_ VARCHAR, males VARCHAR)", "sql": "SELECT percentage___percentage_ FROM table_name_31 WHERE males = '1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the total revenue for each artist?", "schema": "CREATE TABLE ArtSales (SaleID INT, ArtistID INT, Revenue INT); INSERT INTO ArtSales (SaleID, ArtistID, Revenue) VALUES (1, 1, 1000), (2, 1, 2000), (3, 2, 3000), (4, 2, 1500), (5, 3, 2500);", "sql": "SELECT ArtistID, SUM(Revenue) as TotalRevenue FROM ArtSales GROUP BY ArtistID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Update the ticket_price for all tickets of the basketball team events by 10%", "schema": "CREATE TABLE tickets (ticket_id INT, team VARCHAR(50), event_date DATE, ticket_price DECIMAL(5, 2)); INSERT INTO tickets (ticket_id, team, event_date, ticket_price) VALUES (1, 'Basketball', '2022-03-01', 50.00), (2, 'Basketball', '2022-03-15', 75.00);", "sql": "UPDATE tickets SET ticket_price = ticket_price * 1.10 WHERE team = 'Basketball';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the distribution of fair trade coffee beans by country of origin?", "schema": "CREATE TABLE coffee (id INT, country TEXT, percentage FLOAT); INSERT INTO coffee (id, country, percentage) VALUES (1, 'Brazil', 30.0), (2, 'Colombia', 20.0);", "sql": "SELECT country, percentage FROM coffee;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points did the Weslake v12 get in 1966?", "schema": "CREATE TABLE table_name_28 (points INTEGER, engine VARCHAR, year VARCHAR)", "sql": "SELECT MAX(points) FROM table_name_28 WHERE engine = 'weslake v12' AND year < 1966;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Increase data allowance for customers with network downtime complaints", "schema": "CREATE TABLE mobile_subscribers (id INT, name VARCHAR(255), data_allowance INT, contract_start DATE); INSERT INTO mobile_subscribers (id, name, data_allowance, contract_start) VALUES (1, 'John Doe', 5000, '2020-01-01'), (2, 'Jane Doe', 3000, '2019-01-01'); CREATE TABLE customer_complaints (id INT, subscriber_id INT, complaint_date DATE, complaint_type VARCHAR(255)); INSERT INTO customer_complaints (id, subscriber_id, complaint_date, complaint_type) VALUES (1, 1, '2020-02-01', 'Network Downtime');", "sql": "UPDATE mobile_subscribers SET data_allowance = 7000 WHERE id IN (SELECT subscriber_id FROM customer_complaints WHERE complaint_type = 'Network Downtime');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "What is the total revenue for each genre in descending order?", "schema": "CREATE TABLE MusicGenre (GenreID INT, GenreName VARCHAR(50), Revenue DECIMAL(10,2)); INSERT INTO MusicGenre (GenreID, GenreName, Revenue) VALUES (1, 'Pop', 500000.00), (2, 'Rock', 450000.00), (3, 'Jazz', 300000.00), (4, 'Country', 250000.00), (5, 'Blues', 200000.00);", "sql": "SELECT GenreName, SUM(Revenue) OVER (ORDER BY GenreName DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TotalRevenue FROM MusicGenre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 143, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which episode had bbc ranking and canle ranking of n/a?", "schema": "CREATE TABLE table_24399615_10 (episode_no VARCHAR, bbc_three_weekly_ranking VARCHAR, cable_rank VARCHAR)", "sql": "SELECT COUNT(episode_no) FROM table_24399615_10 WHERE bbc_three_weekly_ranking = 'N/A' AND cable_rank = 'N/A';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "PostgreSQL Queries: show example 53.", "schema": null, "sql": "WITH t AS ( DELETE FROM foo ) DELETE FROM bar;", "explanation": "Example from PostgreSQL documentation on Queries.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the total installed capacity of wind farms in Germany?", "schema": "CREATE TABLE wind_farms (id INT, name TEXT, country TEXT, capacity FLOAT); INSERT INTO wind_farms (id, name, country, capacity) VALUES (1, 'Windpark Nordsee', 'Germany', 330.0), (2, 'Bard Offshore 1', 'Germany', 400.0);", "sql": "SELECT SUM(capacity) FROM wind_farms WHERE country = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 508).", "schema": null, "sql": "$$ language plpgsql;\n\ncreate function stacked_diagnostics_test() returns void as $$\ndeclare _sqlstate text;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 107, "num_statements": 2} {"question": "What is the number of students who have taken each open pedagogy course?", "schema": "CREATE TABLE open_pedagogy_courses (course_id INT, course_name VARCHAR(255), student_count INT); INSERT INTO open_pedagogy_courses (course_id, course_name, student_count) VALUES (1001, 'Open Course 1', 50), (1002, 'Open Course 2', 75), (1003, 'Open Course 3', 100), (1004, 'Open Course 4', 125), (1005, 'Open Course 5', 150);", "sql": "SELECT course_name, student_count FROM open_pedagogy_courses;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of products in the 'sustainable_products' table that are certified by the 'certified_products' table?", "schema": "CREATE TABLE sustainable_products (product_id INT, category VARCHAR(255), price DECIMAL(10,2), recycled BOOLEAN, certified_by INT);CREATE TABLE certified_products (certification_id INT, name VARCHAR(255));", "sql": "SELECT COUNT(*) FROM sustainable_products WHERE certified_by IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many poles were there in 1996?", "schema": "CREATE TABLE table_2182573_2 (poles INTEGER, year VARCHAR)", "sql": "SELECT MIN(poles) FROM table_2182573_2 WHERE year = 1996;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the black caribbean population when the black African population is less than 10552.0?", "schema": "CREATE TABLE table_19149550_7 (black_caribbean_population INTEGER, black_african_population INTEGER)", "sql": "SELECT MIN(black_caribbean_population) FROM table_19149550_7 WHERE black_african_population < 10552.0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'random': Write the SELECT query (example 13).", "schema": null, "sql": "SELECT random('NaN'::numeric, 10);", "explanation": "Regression test for Random in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT random('NaN'::numeric, 10)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 15).", "schema": null, "sql": "SELECT * FROM test_timestamp WHERE i>'2004-10-27'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total amount of food assistance provided to refugees?", "schema": "CREATE TABLE refugees (id INT, camp_id INT, food_assistance_given BOOLEAN); INSERT INTO refugees (id, camp_id, food_assistance_given) VALUES (1, 1001, TRUE), (2, 1001, FALSE), (3, 1002, TRUE); CREATE TABLE food_assistance (id INT, camp_id INT, amount FLOAT); INSERT INTO food_assistance (id, camp_id, amount) VALUES (1001, 1001, 500), (1002, 1001, 700), (1003, 1002, 900);", "sql": "SELECT SUM(amount) FROM food_assistance fa JOIN refugees r ON fa.camp_id = r.camp_id WHERE r.food_assistance_given = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What's the total budget for programs in education and healthcare?", "schema": "CREATE TABLE Programs (ProgramID INT, ProgramName TEXT, Budget DECIMAL(10,2), Category TEXT); INSERT INTO Programs (ProgramID, ProgramName, Budget, Category) VALUES (1, 'Healthcare 101', 10000.00, 'Healthcare');", "sql": "SELECT SUM(Budget) FROM Programs WHERE Category IN ('Education', 'Healthcare');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Determine the average number of posts per day for the 'social_media' database.", "schema": "CREATE TABLE posts (post_id INT, user_id INT, post_date DATE); INSERT INTO posts (post_id, user_id, post_date) VALUES (1, 1, '2021-01-01'), (2, 1, '2021-01-02'), (3, 2, '2021-01-01'), (4, 3, '2021-01-02'), (5, 3, '2021-01-03'), (6, 4, '2021-01-01');", "sql": "SELECT AVG(num_posts_per_day) FROM (SELECT user_id, COUNT(*) / COUNT(DISTINCT post_date) AS num_posts_per_day FROM posts GROUP BY user_id) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which yearly change was there a capacity of 78.4% and a rank smaller than 3?", "schema": "CREATE TABLE table_name_31 (annual_change VARCHAR, rank VARCHAR, capacity_in_use VARCHAR)", "sql": "SELECT annual_change FROM table_name_31 WHERE rank < 3 AND capacity_in_use = '78.4%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Delete all records in the \"waste_management\" table where the \"waste_type\" is 'Toxic'", "schema": "CREATE TABLE waste_management (waste_id INT PRIMARY KEY, waste_type VARCHAR(20)); INSERT INTO waste_management (waste_id, waste_type) VALUES (1, 'Non-Toxic'), (2, 'Toxic'), (3, 'Non-Toxic');", "sql": "DELETE FROM waste_management WHERE waste_type = 'Toxic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What hometown is Kyle love from?", "schema": "CREATE TABLE table_name_47 (hometown VARCHAR, name VARCHAR)", "sql": "SELECT hometown FROM table_name_47 WHERE name = 'kyle love';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the birth date of the woman who ceased to be Queen on 18 Jun 1297?", "schema": "CREATE TABLE table_name_47 (birth VARCHAR, ceased_to_be_queen VARCHAR)", "sql": "SELECT birth FROM table_name_47 WHERE ceased_to_be_queen = '18 jun 1297';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the average finish in 2007?", "schema": "CREATE TABLE table_2182573_2 (avg_finish VARCHAR, year VARCHAR)", "sql": "SELECT avg_finish FROM table_2182573_2 WHERE year = 2007;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the sum of the rounds for the player who had a position of LS and an overall draft pick bigger than 230?", "schema": "CREATE TABLE table_name_10 (round INTEGER, position VARCHAR, overall VARCHAR)", "sql": "SELECT SUM(round) FROM table_name_10 WHERE position = 'ls' AND overall > 230;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Which ocean has the most maritime safety incidents?", "schema": "CREATE TABLE incidents (location varchar(255), date date); INSERT INTO incidents (location, date) VALUES ('Pacific Ocean', '2021-08-23'), ('Atlantic Ocean', '2022-02-12'), ('Indian Ocean', '2021-11-18');", "sql": "SELECT location, COUNT(*) FROM incidents GROUP BY location ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what order were the Bee Gees as the artist when it was a result of bottom 3?", "schema": "CREATE TABLE table_name_30 (order__number VARCHAR, result VARCHAR, original_artist VARCHAR)", "sql": "SELECT order__number FROM table_name_30 WHERE result = 'bottom 3' AND original_artist = 'bee gees';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Major League Soccer team for the 2005 season has the lowest goals?", "schema": "CREATE TABLE table_name_63 (goals INTEGER, league VARCHAR, season VARCHAR)", "sql": "SELECT MIN(goals) FROM table_name_63 WHERE league = 'major league soccer' AND season = '2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the game at Comiskey Park and had a 4th Inning?", "schema": "CREATE TABLE table_name_47 (date VARCHAR, location VARCHAR, inning VARCHAR)", "sql": "SELECT date FROM table_name_47 WHERE location = 'comiskey park' AND inning = '4th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the minimum production time for fair trade products in South America?", "schema": "CREATE TABLE fair_trade_products (id INT, product VARCHAR(255), south_american_country VARCHAR(255), production_time INT); INSERT INTO fair_trade_products VALUES (1, 'Reusable Tote Bag', 'Brazil', 30), (2, 'Fair Trade Coffee', 'Colombia', 20), (3, 'Handmade Jewelry', 'Peru', 45), (4, 'Organic Cotton T-Shirt', 'Brazil', 60);", "sql": "SELECT MIN(production_time) FROM fair_trade_products WHERE south_american_country IN ('Brazil', 'Colombia', 'Peru');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the total fuel consumption of each ship type in the fleet?", "schema": "CREATE TABLE fleet (id INT, name VARCHAR(50), type VARCHAR(50), fuel_capacity INT); CREATE TABLE fuel_consumption (id INT, ship_id INT, fuel_consumption INT, consumption_date DATE); INSERT INTO fleet VALUES (1, 'Ship 1', 'Cargo', 10000); INSERT INTO fleet VALUES (2, 'Ship 2', 'Passenger', 12000); INSERT INTO fuel_consumption VALUES (1, 1, 500, '2022-01-01'); INSERT INTO fuel_consumption VALUES (2, 2, 600, '2022-01-15'); INSERT INTO fuel_consumption VALUES (3, 1, 550, '2022-02-01');", "sql": "SELECT fleet.type, SUM(fuel_consumption.fuel_consumption) FROM fleet INNER JOIN fuel_consumption ON fleet.id = fuel_consumption.ship_id GROUP BY fleet.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'fuzzystrmatch' (example 21).", "schema": null, "sql": "SELECT daitch_mokotoff('Kleinman');", "explanation": "Example query from the 'fuzzystrmatch' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 730).", "schema": null, "sql": "select jsonb_delete_path('{\"n\":null, \"a\":1, \"b\":[1,2], \"c\":{\"1\":2}, \"d\":{\"1\":[2,3]}}', '{b,-1}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_delete_path('{\"n\":null, \"a\":1, \"b\":[1,2], \"c\":{\"1\":2}, \"d\":{\"1\":[2,3]}}', '{b,-1}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the maximum salary of construction workers in each occupation?", "schema": "CREATE TABLE construction_workers (worker_id INT, occupation VARCHAR(50), state VARCHAR(50), salary INT); INSERT INTO construction_workers (worker_id, occupation, state, salary) VALUES (1, 'Carpenter', 'California', 60000); INSERT INTO construction_workers (worker_id, occupation, state, salary) VALUES (2, 'Electrician', 'California', 70000);", "sql": "SELECT occupation, MAX(salary) FROM construction_workers GROUP BY occupation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show the names of startups that have 'diverse' teams.", "schema": "CREATE TABLE diversity (id INT, startup_name VARCHAR(50), team_diversity VARCHAR(10));", "sql": "SELECT startup_name FROM diversity WHERE team_diversity = 'diverse';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the id and type of each thing, and the details of the organization that owns it.", "schema": "CREATE TABLE Organizations (organization_details VARCHAR, organization_id VARCHAR); CREATE TABLE Things (thing_id VARCHAR, type_of_Thing_Code VARCHAR, organization_id VARCHAR)", "sql": "SELECT T1.thing_id, T1.type_of_Thing_Code, T2.organization_details FROM Things AS T1 JOIN Organizations AS T2 ON T1.organization_id = T2.organization_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'transactions' (example 120).", "schema": null, "sql": "INSERT INTO savepoints VALUES (11);", "explanation": "DML from PostgreSQL core regression test for Transactions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Backed after 2008 with a label of Wild World?", "schema": "CREATE TABLE table_name_24 (backed_with VARCHAR, record_label VARCHAR, date VARCHAR)", "sql": "SELECT backed_with FROM table_name_24 WHERE record_label = 'wild world' AND date > 2008;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "List all AI ethics principles related to data privacy and security.", "schema": "CREATE TABLE AI_Ethics (principle_id INT, principle_name VARCHAR(50), category VARCHAR(20)); INSERT INTO AI_Ethics (principle_id, principle_name, category) VALUES (1, 'Fairness', 'General'), (2, 'Accountability', 'General'), (3, 'Transparency', 'General'), (4, 'Data Privacy', 'Data'), (5, 'Data Security', 'Data');", "sql": "SELECT principle_name FROM AI_Ethics WHERE category = 'Data';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the minimum sustainable material usage in garments produced in Asia.", "schema": "CREATE TABLE garment_production_asia (id INT, material_percentage DECIMAL, region VARCHAR(20)); INSERT INTO garment_production_asia (id, material_percentage, region) VALUES (1, 75.00, 'Asia'), (2, 80.00, 'Asia'), (3, 78.00, 'Asia');", "sql": "SELECT MIN(material_percentage) FROM garment_production_asia WHERE region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the name of the products that have the color description \"red\" and have the characteristic name \"fast\".", "schema": "CREATE TABLE CHARACTERISTICS (characteristic_id VARCHAR, characteristic_name VARCHAR); CREATE TABLE ref_colors (color_code VARCHAR, color_description VARCHAR); CREATE TABLE products (product_id VARCHAR, color_code VARCHAR); CREATE TABLE product_characteristics (product_id VARCHAR, characteristic_id VARCHAR)", "sql": "SELECT product_name FROM products AS t1 JOIN product_characteristics AS t2 ON t1.product_id = t2.product_id JOIN CHARACTERISTICS AS t3 ON t2.characteristic_id = t3.characteristic_id JOIN ref_colors AS t4 ON t1.color_code = t4.color_code WHERE t4.color_description = 'red' AND t3.characteristic_name = 'fast';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 308, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the High points for Team @ portland?", "schema": "CREATE TABLE table_name_75 (high_points VARCHAR, team VARCHAR)", "sql": "SELECT high_points FROM table_name_75 WHERE team = '@ portland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.icu.utf8': Write the SELECT query (example 81).", "schema": null, "sql": "SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;", "explanation": "Regression test for Collate.Icu.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the May 7 finalist?", "schema": "CREATE TABLE table_name_14 (finalist VARCHAR, week VARCHAR)", "sql": "SELECT finalist FROM table_name_14 WHERE week = 'may 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player had a to par score larger than 7 and a total score of 298?", "schema": "CREATE TABLE table_name_5 (player VARCHAR, to_par VARCHAR, total VARCHAR)", "sql": "SELECT player FROM table_name_5 WHERE to_par > 7 AND total = 298;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of games with 1 loss and points less than 18?", "schema": "CREATE TABLE table_name_9 (games INTEGER, lost VARCHAR, points VARCHAR)", "sql": "SELECT MAX(games) FROM table_name_9 WHERE lost = 1 AND points < 18;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Find all brands that use ingredients from 'Brazil' and have a safety record after 2021-01-01", "schema": "CREATE TABLE ingredient (product_id INT, ingredient TEXT, origin TEXT);", "sql": "SELECT DISTINCT brand FROM ingredient INNER JOIN safety_record ON ingredient.product_id = safety_record.product_id WHERE origin = 'Brazil' AND report_date > '2021-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the total production output for all factories in France?", "schema": "CREATE TABLE factory (id INT, name TEXT, sector TEXT, country TEXT); INSERT INTO factory (id, name, sector, country) VALUES (1, 'FactoryA', 'automotive', 'France'), (2, 'FactoryB', 'aerospace', 'France'), (3, 'FactoryC', 'electronics', 'Germany'); CREATE TABLE production (factory_id INT, output REAL); INSERT INTO production (factory_id, output) VALUES (1, 1000), (1, 1200), (2, 1500), (3, 1800);", "sql": "SELECT SUM(production.output) FROM production INNER JOIN factory ON production.factory_id = factory.id WHERE factory.country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the Class C winner in round 8?", "schema": "CREATE TABLE table_24852622_1 (class_c_winner VARCHAR, round VARCHAR)", "sql": "SELECT class_c_winner FROM table_24852622_1 WHERE round = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which date has a Week smaller than 14, and an Opponent of san francisco 49ers?", "schema": "CREATE TABLE table_name_92 (date VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_92 WHERE week < 14 AND opponent = 'san francisco 49ers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 129).", "schema": null, "sql": "SELECT jsonb_exists_any('{\"a\":null, \"b\":\"qq\"}', ARRAY['c','a']);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_exists_any('{\"a\":null, \"b\":\"qq\"}', ARRAY['c','a'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total water resources m3 for rainfall being 650", "schema": "CREATE TABLE table_22854436_1 (per_capita_average_annual_renewable_water_resources_m_3 VARCHAR, average_annual_rainfall__mm_ VARCHAR)", "sql": "SELECT COUNT(per_capita_average_annual_renewable_water_resources_m_3) FROM table_22854436_1 WHERE average_annual_rainfall__mm_ = '650';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Show the national security events with their corresponding severity levels, and calculate the average severity level.", "schema": "CREATE TABLE national_security_events (id INT, event VARCHAR, severity INT); INSERT INTO national_security_events (id, event, severity) VALUES (1, 'Terrorist Attack', 8), (2, 'Cyber Espionage', 5), (3, 'Nuclear Missile Test', 10);", "sql": "SELECT event, severity, AVG(severity) OVER () as avg_severity FROM national_security_events;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Update the gender of the founder for a specific company.", "schema": "CREATE TABLE companies (id INT, name TEXT, founded_date DATE, founder_gender TEXT); INSERT INTO companies (id, name, founded_date, founder_gender) VALUES (1, 'Acme Inc', '2010-01-01', 'male'); INSERT INTO companies (id, name, founded_date, founder_gender) VALUES (2, 'Beta Corp', '2015-05-15', 'male');", "sql": "UPDATE companies SET founder_gender = 'female' WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List the names and ethnicities of all journalists in the 'reporting' department", "schema": "CREATE TABLE journalists (id INT PRIMARY KEY, name VARCHAR(255), department VARCHAR(255), ethnicity VARCHAR(255)); INSERT INTO journalists (id, name, department, ethnicity) VALUES (1, 'Sanaa Ahmed', 'reporting', 'Pakistani'), (2, 'José Rodriguez', 'reporting', 'Mexican'), (3, 'Emma Thompson', 'editing', 'British'), (4, 'Mohammed Ali', 'photography', 'Egyptian');", "sql": "SELECT name, ethnicity FROM journalists WHERE department = 'reporting';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 257).", "schema": null, "sql": "select interval '-9223372036854775808 microseconds -0.01 months';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval '-9223372036854775808 microseconds -0.01 months') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Display the number of construction workers who have worked on projects in each city, sorted alphabetically by city name.", "schema": "CREATE TABLE construction_workers (worker_id INT, name TEXT); CREATE TABLE project_locations (project_id INT, city TEXT, state TEXT); CREATE TABLE worker_projects (worker_id INT, project_id INT); INSERT INTO construction_workers (worker_id, name) VALUES (1, 'John Doe'), (2, 'Jane Smith'), (3, 'Maria Garcia'), (4, 'Ahmed Patel'); INSERT INTO project_locations (project_id, city, state) VALUES (1, 'New York City', 'New York'), (2, 'Houston', 'Texas'), (3, 'Los Angeles', 'California'), (4, 'New York City', 'New York'); INSERT INTO worker_projects (worker_id, project_id) VALUES (1, 1), (1, 2), (2, 2), (3, 1), (3, 3), (4, 1), (4, 4);", "sql": "SELECT project_locations.city, COUNT(DISTINCT worker_projects.worker_id) FROM worker_projects INNER JOIN project_locations ON worker_projects.project_id = project_locations.project_id GROUP BY project_locations.city ORDER BY project_locations.city ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "How many hotels in 'Tokyo' have adopted AI chatbots?", "schema": "CREATE TABLE hotels (hotel_id INT, name TEXT, city TEXT, ai_chatbot BOOLEAN);", "sql": "SELECT city, COUNT(*) as num_hotels FROM hotels WHERE city = 'Tokyo' AND ai_chatbot = TRUE GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the average funding amount received by startups founded by women in the healthcare industry?", "schema": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founder_gender TEXT); INSERT INTO company (id, name, industry, founder_gender) VALUES (1, 'MedHealth', 'Healthcare', 'Female'); INSERT INTO company (id, name, industry, founder_gender) VALUES (2, 'TechBio', 'Biotechnology', 'Male');", "sql": "SELECT AVG(funding_amount) FROM funding INNER JOIN company ON funding.company_id = company.id WHERE company.founder_gender = 'Female' AND company.industry = 'Healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the total landfill capacity for each region in the country of Canada?", "schema": "CREATE TABLE regions (id INT, name VARCHAR(255), country VARCHAR(255)); INSERT INTO regions (id, name, country) VALUES (1, 'Ontario', 'Canada'), (2, 'Quebec', 'Canada'), (3, 'British Columbia', 'Canada'); CREATE TABLE landfill_capacity (region_id INT, capacity INT); INSERT INTO landfill_capacity (region_id, capacity) VALUES (1, 1000000), (1, 1200000), (2, 800000), (3, 1500000);", "sql": "SELECT r.name as region, SUM(lc.capacity) as total_capacity FROM regions r JOIN landfill_capacity lc ON r.id = lc.region_id WHERE r.country = 'Canada' GROUP BY r.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "Determine the average ocean acidification level in the Arctic Ocean.", "schema": "CREATE TABLE ocean_acidification_arctic (location VARCHAR(255), level FLOAT); INSERT INTO ocean_acidification_arctic (location, level) VALUES ('Arctic Ocean', 8.15);", "sql": "SELECT AVG(level) FROM ocean_acidification_arctic WHERE location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total duration of all films in the 'foreign' category?", "schema": "CREATE TABLE films (id INT, title TEXT, category TEXT, duration INT); INSERT INTO films (id, title, category, duration) VALUES (1, 'Film 1', 'foreign', 120), (2, 'Film 2', 'domestic', 90), (3, 'Film 3', 'foreign', 180);", "sql": "SELECT SUM(duration) FROM films WHERE category = 'foreign';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the loss of the game attended by 29,704?", "schema": "CREATE TABLE table_name_21 (loss VARCHAR, attendance VARCHAR)", "sql": "SELECT loss FROM table_name_21 WHERE attendance = '29,704';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the total number for the Bulls when they were at Old Trafford?", "schema": "CREATE TABLE table_name_27 (bulls VARCHAR, venue VARCHAR)", "sql": "SELECT COUNT(bulls) FROM table_name_27 WHERE venue = 'old trafford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average carbon price in the EU and USA?", "schema": "CREATE TABLE carbon_pricing (country VARCHAR(20), carbon_price DECIMAL(5,2)); INSERT INTO carbon_pricing (country, carbon_price) VALUES ('Germany', 25.00), ('France', 32.00), ('USA', 12.00), ('UK', 28.00), ('Italy', 22.00);", "sql": "SELECT AVG(carbon_price) FROM carbon_pricing WHERE country IN ('EU', 'USA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the broadcast are for channel 1?", "schema": "CREATE TABLE table_2638104_1 (broadcast_area VARCHAR, channel VARCHAR)", "sql": "SELECT broadcast_area FROM table_2638104_1 WHERE channel = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Present founders who identify as LGBTQ+.", "schema": "CREATE TABLE lgbtq_founders (company_id INT, founder_id INT, founder_lgbtq BOOLEAN); INSERT INTO lgbtq_founders (company_id, founder_id, founder_lgbtq) VALUES (1, 1, FALSE), (1, 2, TRUE), (2, 1, FALSE), (3, 1, FALSE);", "sql": "SELECT company_id, founder_id FROM lgbtq_founders WHERE founder_lgbtq = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "pgTAP test for Policy (assertion 19).", "schema": null, "sql": "/****************************************************************************/\n-- Test policies_are().\nSELECT * FROM check_test(\n policies_are( 'public', 'passwd', ARRAY['root_all', 'all_view', 'user_mod', 'daemon_insert', 'daemon_delete'], 'whatever' ),\n true,\n 'policies_are(schema, table, policies, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Policy.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 344, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which song has a Picturization of Vijay?", "schema": "CREATE TABLE table_name_86 (song VARCHAR, picturization VARCHAR)", "sql": "SELECT song FROM table_name_86 WHERE picturization = 'vijay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the total number of pollution control initiatives in the 'Pollution' schema from 2018 to 2021?", "schema": "CREATE SCHEMA Pollution; CREATE TABLE Initiatives (initiative_id INT, year INT); INSERT INTO Initiatives (initiative_id, year) VALUES (1, 2018), (2, 2019), (3, 2020), (4, 2021), (5, 2022);", "sql": "SELECT COUNT(*) FROM Pollution.Initiatives WHERE year BETWEEN 2018 AND 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the average Total that has the To par of 15?", "schema": "CREATE TABLE table_name_36 (total INTEGER, to_par VARCHAR)", "sql": "SELECT AVG(total) FROM table_name_36 WHERE to_par = 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 620).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION set_hasnt( TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the low week from october 15, 1961?", "schema": "CREATE TABLE table_name_82 (week INTEGER, date VARCHAR)", "sql": "SELECT MIN(week) FROM table_name_82 WHERE date = 'october 15, 1961';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Determine the number of customers who exclusively purchase sustainable clothing.", "schema": "CREATE TABLE Customers (CustomerID INT, PurchaseHistory VARCHAR(255)); INSERT INTO Customers (CustomerID, PurchaseHistory) VALUES (1, 'Organic Cotton T-Shirt, Recycled Polyester Leggings'), (2, 'Bamboo Blouse, Conventional Cotton Jeans'), (3, 'Tencel Skirt'), (4, 'Recycled Nylon Jacket'), (5, 'Viscose Dress');", "sql": "SELECT COUNT(*) FROM Customers WHERE PurchaseHistory NOT LIKE '%Conventional Cotton%' AND PurchaseHistory NOT LIKE '%Viscose%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "How many wheelchair accessible taxis are available in the fleet?", "schema": "CREATE TABLE taxis (id INT, type VARCHAR(10), capacity INT, accessible BOOLEAN); INSERT INTO taxis (id, type, capacity, accessible) VALUES (1, 'Sedan', 4, FALSE), (2, 'SUV', 6, FALSE), (3, 'Wheelchair', 4, TRUE);", "sql": "SELECT COUNT(*) FROM taxis WHERE type = 'Wheelchair' AND accessible = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was placed third in 2001?", "schema": "CREATE TABLE table_17632217_2 (third_place VARCHAR, season VARCHAR)", "sql": "SELECT third_place FROM table_17632217_2 WHERE season = 2001;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the play for 1976", "schema": "CREATE TABLE table_name_20 (play VARCHAR, year VARCHAR)", "sql": "SELECT play FROM table_name_20 WHERE year = 1976;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Find the average sustainability score of products supplied by companies in 'China' and 'Japan'?", "schema": "CREATE TABLE suppliers (id INT PRIMARY KEY, name VARCHAR(255), country VARCHAR(255), sustainability_score INT); INSERT INTO suppliers (id, name, country, sustainability_score) VALUES (1, 'Supplier A', 'China', 80), (2, 'Supplier B', 'Japan', 85), (3, 'Supplier C', 'China', 90), (4, 'Supplier D', 'Japan', 95); CREATE TABLE products (id INT PRIMARY KEY, name VARCHAR(255), supplier_id INT, sustainability_score INT, FOREIGN KEY (supplier_id) REFERENCES suppliers(id)); INSERT INTO products (id, name, supplier_id, sustainability_score) VALUES (1, 'Product A', 1, 85), (2, 'Product B', 2, 88), (3, 'Product C', 3, 92), (4, 'Product D', 4, 96);", "sql": "SELECT AVG(p.sustainability_score) FROM products p INNER JOIN suppliers s ON p.supplier_id = s.id WHERE s.country IN ('China', 'Japan');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "List all protected forests located in 'South America' with an area smaller than 600.", "schema": "CREATE TABLE protected_forests_2 (id INT, name VARCHAR(50), area FLOAT, region VARCHAR(50)); INSERT INTO protected_forests_2 (id, name, area, region) VALUES (1, 'Sierra Forest', 550.0, 'South America'), (2, 'Rainforest Reserve', 700.0, 'South America');", "sql": "SELECT name FROM protected_forests_2 WHERE area < 600 AND region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 27).", "schema": null, "sql": "SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the ends for the player with a transfer fee of loan?", "schema": "CREATE TABLE table_name_33 (ends VARCHAR, transfer_fee VARCHAR)", "sql": "SELECT ends FROM table_name_33 WHERE transfer_fee = 'loan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the junior type with an intake of 60 and a DCSF number less than 3386 with the smallest Ofsted number?", "schema": "CREATE TABLE table_name_62 (ofsted_number INTEGER, dcsf_number VARCHAR, intake VARCHAR, type VARCHAR)", "sql": "SELECT MIN(ofsted_number) FROM table_name_62 WHERE intake = 60 AND type = 'junior' AND dcsf_number < 3386;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Find the average age of players who play games with the 'Racing' genre", "schema": "CREATE TABLE PlayerDemographics (PlayerID INT, Game VARCHAR(20), Age INT); INSERT INTO PlayerDemographics (PlayerID, Game, Age) VALUES (1, 'Need for Speed', 28), (2, 'F1 2020', 32), (3, 'Mario Kart 8', 18), (4, 'Gran Turismo Sport', 35);", "sql": "SELECT AVG(Age) FROM (SELECT Age FROM PlayerDemographics WHERE Game IN (SELECT Game FROM GameDesignData WHERE Genre = 'Racing')) AS Subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the distribution of financial literacy scores for customers in Texas?", "schema": "CREATE TABLE customers (customer_id INT, name VARCHAR(255), state VARCHAR(255), financial_literacy_score INT);", "sql": "SELECT state, COUNT(*) as count, MIN(financial_literacy_score) as min_score, AVG(financial_literacy_score) as avg_score, MAX(financial_literacy_score) as max_score FROM customers WHERE state = 'Texas' GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "PostgreSQL regression test 'predicate': Write the SELECT query (example 112).", "schema": null, "sql": "SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD;", "explanation": "Regression test for Predicate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the maximum capacity of Wind Farms in Germany?", "schema": "CREATE TABLE Wind_Farms (project_id INT, location VARCHAR(50), capacity FLOAT); INSERT INTO Wind_Farms (project_id, location, capacity) VALUES (1, 'Germany', 120.5), (2, 'France', 95.3), (3, 'Germany', 152.8), (4, 'Spain', 119.9);", "sql": "SELECT MAX(capacity) FROM Wind_Farms WHERE location = 'Germany';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many assists does cam long average in under 132 games?", "schema": "CREATE TABLE table_name_28 (ast_avg INTEGER, player VARCHAR, games VARCHAR)", "sql": "SELECT MAX(ast_avg) FROM table_name_28 WHERE player = 'cam long' AND games < 132;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "How many policies were issued in the last quarter in New York?", "schema": "CREATE TABLE policies (id INT, policyholder_id INT, policy_type TEXT, issue_date DATE, expiry_date DATE); INSERT INTO policies (id, policyholder_id, policy_type, issue_date, expiry_date) VALUES (1, 3, 'Life', '2021-01-01', '2022-01-01'), (2, 4, 'Health', '2021-02-01', '2022-02-01'), (3, 5, 'Auto', '2021-03-01', '2022-03-01');", "sql": "SELECT COUNT(policies.id) FROM policies WHERE policies.issue_date >= '2021-04-01' AND policies.issue_date < '2021-07-01' AND policies.state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the total production for wells in the 'offshore' region in 2020?", "schema": "CREATE TABLE wells (well_id INT, well_name VARCHAR(50), region VARCHAR(20), production FLOAT, year INT); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (1, 'Well A', 'onshore', 100.0, 2019); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (2, 'Well B', 'offshore', 200.0, 2020); INSERT INTO wells (well_id, well_name, region, production, year) VALUES (3, 'Well C', 'onshore', 150.0, 2021);", "sql": "SELECT SUM(production) FROM wells WHERE region = 'offshore' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Race 1, when Race 4 is 1?", "schema": "CREATE TABLE table_name_85 (race_1 VARCHAR, race_4 VARCHAR)", "sql": "SELECT race_1 FROM table_name_85 WHERE race_4 = '1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average rank of Elise Matthysen in lanes under 8?", "schema": "CREATE TABLE table_name_46 (rank INTEGER, name VARCHAR, lane VARCHAR)", "sql": "SELECT AVG(rank) FROM table_name_46 WHERE name = 'elise matthysen' AND lane < 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Subtransaction (example 13).", "schema": null, "sql": "SELECT subtransaction_nested_test();", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Subtransaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Delete all records with an employment date before '2020-01-01' from the 'veteran_employment' table", "schema": "CREATE TABLE veteran_employment (veteran_id INT, sector VARCHAR(255), employment_date DATE); INSERT INTO veteran_employment (veteran_id, sector, employment_date) VALUES (1, 'IT', '2020-01-01'), (2, 'Healthcare', '2019-06-15'), (3, 'Finance', '2018-09-30'), (4, 'Manufacturing', '2021-04-01'), (5, 'Education', '2020-12-15');", "sql": "DELETE FROM veteran_employment WHERE employment_date < '2020-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many top-25s are associated with more than 91 events?", "schema": "CREATE TABLE table_name_24 (top_25 INTEGER, events INTEGER)", "sql": "SELECT SUM(top_25) FROM table_name_24 WHERE events > 91;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What was the total revenue generated by events with more than 150 attendees, sorted by event name?", "schema": "CREATE TABLE Events (EventID int, EventName varchar(50), Attendance int, TicketPrice numeric); INSERT INTO Events VALUES (1, 'Dance Recital', 200, 30), (2, 'Poetry Slam', 100, 40), (3, 'Art Exhibit', 250, 50);", "sql": "SELECT SUM(Attendance * TicketPrice) AS TotalRevenue FROM Events WHERE Attendance > 150 GROUP BY EventName ORDER BY EventName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Find the average severity of vulnerabilities for each asset in the 'vulnerabilities' and 'assets' tables", "schema": "CREATE TABLE assets (asset_id INT PRIMARY KEY, asset_name VARCHAR(255)); INSERT INTO assets (asset_id, asset_name) VALUES (1, 'Printer01'), (2, 'Workstation02'); CREATE TABLE vulnerabilities (vulnerability_id INT PRIMARY KEY, asset_id INT, vulnerability_severity INT); INSERT INTO vulnerabilities (vulnerability_id, asset_id, vulnerability_severity) VALUES (1, 1, 5), (2, 1, 6), (3, 2, 8), (4, 2, 9);", "sql": "SELECT a.asset_name, AVG(v.vulnerability_severity) as avg_severity FROM assets a INNER JOIN vulnerabilities v ON a.asset_id = v.asset_id GROUP BY a.asset_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "How many destinations are there for each type of tourism?", "schema": "CREATE TABLE destinations (destination_id INT, name TEXT, type TEXT); INSERT INTO destinations (destination_id, name, type) VALUES (1, 'Parksville', 'Eco-friendly'), (2, 'Tofino', 'Eco-friendly'), (3, 'Vancouver', 'Urban'), (4, 'Whistler', 'Ski'), (5, 'Banff', 'Sustainable'), (6, 'Jasper', 'Sustainable');", "sql": "SELECT type, COUNT(*) FROM destinations GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 14).", "schema": null, "sql": "SELECT JSON('{\"a\": 1, \"a\": 2}' WITHOUT UNIQUE KEYS);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON('{\"a\": 1, \"a\": 2}' WITHOUT UNIQUE KEYS)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 52, "num_statements": 1} {"question": "What is the total production cost and revenue for each indigenous food system in the \"indigenous_food_systems\" and \"expenses\" tables?", "schema": "CREATE TABLE indigenous_food_systems (id INT, food_system_name VARCHAR(50), revenue INT); CREATE TABLE expenses (id INT, food_system_id INT, cost INT);", "sql": "SELECT indigenous_food_systems.food_system_name, SUM(expenses.cost) AS total_cost, SUM(indigenous_food_systems.revenue) AS total_revenue FROM indigenous_food_systems INNER JOIN expenses ON indigenous_food_systems.id = expenses.food_system_id GROUP BY indigenous_food_systems.food_system_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 292, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (sql_query, item 6).", "schema": null, "sql": "--test integer partition key\nCREATE TABLE \"int_part\"(time timestamp, object_id int, temp float);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who played defensive when the rookie Craig Point was playing during week 6?", "schema": "CREATE TABLE table_name_97 (defensive VARCHAR, rookie VARCHAR, week VARCHAR)", "sql": "SELECT defensive FROM table_name_97 WHERE rookie = 'craig point' AND week = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all electric vehicle charging stations in the state of New York, along with their locations.", "schema": "CREATE TABLE ev_charging_stations (id INT, station_name VARCHAR(50), state VARCHAR(50), location VARCHAR(50)); INSERT INTO ev_charging_stations (id, station_name, state, location) VALUES (1, 'New York City EV Charging', 'New York', 'Manhattan');", "sql": "SELECT station_name, location FROM ev_charging_stations WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What was the total sales revenue for DrugA in Q2 2020?", "schema": "CREATE TABLE sales(drug_name TEXT, quarter INT, year INT, revenue FLOAT); INSERT INTO sales(drug_name, quarter, year, revenue) VALUES('DrugA', 1, 2020, 150000), ('DrugA', 2, 2020, 200000), ('DrugA', 3, 2020, 180000), ('DrugA', 4, 2020, 220000);", "sql": "SELECT SUM(revenue) FROM sales WHERE drug_name = 'DrugA' AND quarter = 2 AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 145).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '*.d.*';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the earliest transaction date for 'VendorW' in the reverse logistics domain?", "schema": "CREATE TABLE Vendors (VendorID VARCHAR(20), VendorName VARCHAR(20)); INSERT INTO Vendors (VendorID, VendorName) VALUES ('X', 'VendorX'), ('W', 'VendorW'); CREATE TABLE ReverseLogisticsTransactions (TransactionID INT, VendorID VARCHAR(20), TransactionStatus VARCHAR(20), TransactionDate DATE); INSERT INTO ReverseLogisticsTransactions (TransactionID, VendorID, TransactionStatus, TransactionDate) VALUES (4, 'W', 'Returned', '2022-01-04');", "sql": "SELECT MIN(ReverseLogisticsTransactions.TransactionDate) AS EarliestTransactionDate FROM ReverseLogisticsTransactions JOIN Vendors ON ReverseLogisticsTransactions.VendorID = Vendors.VendorID WHERE Vendors.VendorName = 'VendorW';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 228, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 53).", "schema": null, "sql": "CREATE FUNCTION intarray_del_elem(_int4, int4)\nRETURNS _int4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 3).", "schema": null, "sql": "CREATE TYPE gbtreekey2 (\n\tINTERNALLENGTH = 2,\n\tINPUT = gbtreekey2_in,\n\tOUTPUT = gbtreekey2_out\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 98, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 38).", "schema": null, "sql": "SELECT latitude(ll_to_earth(0,90))::numeric(20,10);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'test_setup' (example 50).", "schema": null, "sql": "CREATE TABLE road (\n\tname\t\ttext,\n\tthepath \tpath\n);", "explanation": "DDL from PostgreSQL core regression test for Test Setup.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the minimum account balance for customers in the Midwest region?", "schema": "CREATE TABLE customers (customer_id INT, name VARCHAR(50), region VARCHAR(50), account_balance DECIMAL(10,2)); INSERT INTO customers (customer_id, name, region, account_balance) VALUES (1, 'John Doe', 'Midwest', 5000.00), (2, 'Jane Smith', 'Northeast', 7000.00);", "sql": "SELECT MIN(account_balance) FROM customers WHERE region = 'Midwest';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the distribution of cruelty-free certifications by year for cosmetics products in the makeup category?", "schema": "CREATE TABLE certifications(certification_id INT, product_id INT, certification_type VARCHAR(50), certified_date DATE);", "sql": "SELECT YEAR(certified_date) as certification_year, COUNT(*) as cruelty_free_count FROM certifications JOIN cosmetics_products ON certifications.product_id = cosmetics_products.product_id WHERE certifications.certification_type = 'cruelty-free' AND cosmetics_products.category = 'makeup' GROUP BY certification_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 315, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of game sites for chicago bears", "schema": "CREATE TABLE table_14977592_1 (game_site VARCHAR, opponent VARCHAR)", "sql": "SELECT COUNT(game_site) FROM table_14977592_1 WHERE opponent = 'Chicago Bears';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team owns the Venue of western oval?", "schema": "CREATE TABLE table_name_52 (home_team VARCHAR, venue VARCHAR)", "sql": "SELECT home_team FROM table_name_52 WHERE venue = 'western oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many values for number of clubs have Shandong as the runner-up?", "schema": "CREATE TABLE table_17632217_1 (number_of_clubs VARCHAR, runners_up VARCHAR)", "sql": "SELECT COUNT(number_of_clubs) FROM table_17632217_1 WHERE runners_up = 'Shandong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Provide the number of customers who have taken out loans with a maturity of 5 years or more, broken down by gender and age group, in Germany?", "schema": "CREATE TABLE customers (customer_id INT, customer_name TEXT, gender TEXT, age INT, country TEXT); INSERT INTO customers (customer_id, customer_name, gender, age, country) VALUES (1, 'Alex', 'Male', 35, 'Germany'), (2, 'Sarah', 'Female', 40, 'Germany'); CREATE TABLE loans (loan_id INT, customer_id INT, maturity INT); INSERT INTO loans (loan_id, customer_id, maturity) VALUES (1, 1, 5), (2, 2, 6);", "sql": "SELECT gender, CASE WHEN age < 30 THEN '18-29' WHEN age < 50 THEN '30-49' ELSE '50+' END AS age_group, COUNT(*) FROM customers JOIN loans ON customers.customer_id = loans.customer_id WHERE country = 'Germany' AND maturity >= 5 GROUP BY gender, age_group;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance of the game that lasted 3:55?", "schema": "CREATE TABLE table_name_1 (att VARCHAR, time VARCHAR)", "sql": "SELECT att FROM table_name_1 WHERE time = '3:55';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which title was Ann Rutherford the leading lady for Joseph Kane?", "schema": "CREATE TABLE table_name_73 (title VARCHAR, leading_lady VARCHAR, director VARCHAR)", "sql": "SELECT title FROM table_name_73 WHERE leading_lady = 'ann rutherford' AND director = 'joseph kane';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the cost of 972 Min Press caused 52 death?", "schema": "CREATE TABLE table_name_36 (damage__millions_usd__ VARCHAR, min_press___mbar__ VARCHAR, deaths VARCHAR)", "sql": "SELECT damage__millions_usd__ FROM table_name_36 WHERE min_press___mbar__ = '972' AND deaths = '52';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 418).", "schema": null, "sql": "CREATE OPERATOR CLASS issn13_ops DEFAULT\n\tFOR TYPE issn13 USING btree FAMILY isn_ops AS\n\tOPERATOR 1 <,\n\tOPERATOR 2 <=,\n\tOPERATOR 3 =,\n\tOPERATOR 4 >=,\n\tOPERATOR 5 >,\n\tFUNCTION 1 btissn13cmp(issn13, issn13);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 211, "num_statements": 1} {"question": "How many public health policy violations were reported in the Western and Asian regions?", "schema": "CREATE TABLE western_policy_violations (region VARCHAR(255), violation VARCHAR(255)); INSERT INTO western_policy_violations (region, violation) VALUES ('Western', 'Smoking Ban Violation'); INSERT INTO western_policy_violations (region, violation) VALUES ('Western', 'Noise Complaint'); CREATE TABLE asian_policy_violations (region VARCHAR(255), violation VARCHAR(255)); INSERT INTO asian_policy_violations (region, violation) VALUES ('Asian', 'Food Safety Violation'); INSERT INTO asian_policy_violations (region, violation) VALUES ('Asian', 'Building Code Violation');", "sql": "SELECT COUNT(*) FROM western_policy_violations UNION ALL SELECT COUNT(*) FROM asian_policy_violations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the venue when Jerran Hart won?", "schema": "CREATE TABLE table_name_20 (venue VARCHAR, winner VARCHAR)", "sql": "SELECT venue FROM table_name_20 WHERE winner = 'jerran hart';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest total that has a rank less than 8, a silver greater than 6, and 20 as the bronze?", "schema": "CREATE TABLE table_name_70 (total INTEGER, bronze VARCHAR, rank VARCHAR, silver VARCHAR)", "sql": "SELECT MIN(total) FROM table_name_70 WHERE rank < 8 AND silver > 6 AND bronze = 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the recycling rate trend by material type in the last 3 months?", "schema": "CREATE TABLE recycling_rate_trend(date DATE, material VARCHAR(255), recycling_rate FLOAT);", "sql": "SELECT date, material, recycling_rate FROM recycling_rate_trend WHERE date >= DATEADD(month, -3, GETDATE()) ORDER BY date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of bridge is Colton's Crossing Bridge?", "schema": "CREATE TABLE table_name_46 (type VARCHAR, name VARCHAR)", "sql": "SELECT type FROM table_name_46 WHERE name = 'colton's crossing bridge';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the record for November 4, with a decision made by Denis?", "schema": "CREATE TABLE table_name_10 (record VARCHAR, decision VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_name_10 WHERE decision = 'denis' AND date = 'november 4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest number of goals against when 8 games were lost, and the goals for are 60?", "schema": "CREATE TABLE table_name_86 (goals_against INTEGER, lost VARCHAR, goals_for VARCHAR)", "sql": "SELECT MIN(goals_against) FROM table_name_86 WHERE lost = 8 AND goals_for = 60;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many hybrid vehicles were sold in Canada in 2021?", "schema": "CREATE TABLE VehicleSales (Id INT, VehicleType VARCHAR(50), Year INT, Country VARCHAR(50), Sales INT); INSERT INTO VehicleSales (Id, VehicleType, Year, Country, Sales) VALUES (1, 'Electric', 2021, 'Canada', 25000), (2, 'Hybrid', 2021, 'Canada', 30000), (3, 'Gasoline', 2021, 'Canada', 150000), (4, 'Electric', 2021, 'USA', 40000), (5, 'Hybrid', 2021, 'USA', 50000);", "sql": "SELECT SUM(Sales) FROM VehicleSales WHERE Year = 2021 AND Country = 'Canada' AND VehicleType = 'Hybrid';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the average attendance when the New York Mets were opponents with a record of 51-33?", "schema": "CREATE TABLE table_name_81 (attendance INTEGER, opponent VARCHAR, record VARCHAR)", "sql": "SELECT AVG(attendance) FROM table_name_81 WHERE opponent = 'new york mets' AND record = '51-33';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type for the Cathay Pacific Holidays company, an incorporation of Hong Kong and listed activities as Travel Agency?", "schema": "CREATE TABLE table_name_42 (type VARCHAR, company VARCHAR, incorporated_in VARCHAR, principal_activities VARCHAR)", "sql": "SELECT type FROM table_name_42 WHERE incorporated_in = 'hong kong' AND principal_activities = 'travel agency' AND company = 'cathay pacific holidays';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "List all affordable housing units in the state of New York that have a size less than 1000 square feet.", "schema": "CREATE TABLE AffordableHousing (id INT, state VARCHAR(20), size FLOAT);", "sql": "SELECT * FROM AffordableHousing WHERE state = 'New York' AND size < 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the change in air pollution levels (PM2.5) by month, for the past year, for a specific mining location?", "schema": "CREATE TABLE environment (environment_id INT, mine_id INT, date DATE, pm2_5 FLOAT); INSERT INTO environment (environment_id, mine_id, date, pm2_5) VALUES (1, 1, '2021-01-01', 12), (2, 1, '2021-02-01', 14), (3, 1, '2021-03-01', 15), (4, 1, '2021-04-01', 16), (5, 1, '2021-05-01', 18), (6, 1, '2021-06-01', 20), (7, 1, '2021-07-01', 22), (8, 1, '2021-08-01', 25), (9, 1, '2021-09-01', 27), (10, 1, '2021-10-01', 30), (11, 1, '2021-11-01', 32), (12, 1, '2021-12-01', 35);", "sql": "SELECT EXTRACT(MONTH FROM date) as month, (LEAD(pm2_5) OVER (ORDER BY date) - pm2_5) as pm2_5_change FROM environment WHERE mine_id = 1 AND date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) AND CURRENT_DATE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 210, "num_statements": 1} {"question": "What is the minimum dissolved oxygen level recorded in the Arctic Ocean?", "schema": "CREATE TABLE ocean_properties (location VARCHAR(255), dissolved_oxygen FLOAT); INSERT INTO ocean_properties (location, dissolved_oxygen) VALUES ('Arctic Ocean', 5.6), ('Antarctic Ocean', 6.2);", "sql": "SELECT MIN(dissolved_oxygen) FROM ocean_properties WHERE location = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the maximum donation amount in the 'Donations' table?", "schema": "CREATE TABLE Donations (id INT, department VARCHAR(20), amount FLOAT); INSERT INTO Donations (id, department, amount) VALUES (1, 'Animals', 500.00), (2, 'Education', 300.00);", "sql": "SELECT MAX(amount) FROM Donations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Insert records into the TalentAcquisition table", "schema": "CREATE TABLE TalentAcquisition (ApplicantID INT PRIMARY KEY, JobTitle VARCHAR(30), Department VARCHAR(20), ApplicationDate DATE);", "sql": "INSERT INTO TalentAcquisition (ApplicantID, JobTitle, Department, ApplicationDate) VALUES (1, 'Software Engineer', 'Engineering', '2022-01-01'), (2, 'Data Analyst', 'Marketing', '2022-02-15'), (3, 'Project Manager', 'Operations', '2022-03-05');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "pgTAP test for Partitions (assertion 23).", "schema": null, "sql": "-- is_partition_of() should fail for partition sub but wrong base.\nSELECT * FROM check_test(\n is_partition_of( 'public', 'part1', 'public', 'base', 'whatevs' ),\n false,\n 'is_partition_of( csch, ctab, psch, non-part ptab, desc )',\n 'whatevs',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Partitions.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 263, "num_statements": 1} {"question": "What is the average movie rating for each director?", "schema": "CREATE TABLE movies (title VARCHAR(255), rating INT, director VARCHAR(50)); INSERT INTO movies (title, rating, director) VALUES ('Movie1', 8, 'DirectorA'), ('Movie2', 7, 'DirectorB'), ('Movie3', 9, 'DirectorA'), ('Movie4', 6, 'DirectorB');", "sql": "SELECT director, AVG(rating) as avg_rating FROM movies GROUP BY director;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total water usage by all agricultural customers in the month of July?", "schema": "CREATE TABLE water_usage(customer_id INT, usage FLOAT, month DATE); INSERT INTO water_usage(customer_id, usage, month) VALUES (1, 500, '2022-07-01'), (2, 350, '2022-07-01'), (3, 700, '2022-07-01');", "sql": "SELECT SUM(usage) FROM water_usage WHERE month = '2022-07-01' AND customer_type = 'agricultural';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_trgm' (example 19).", "schema": null, "sql": "select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';", "explanation": "Example query from the 'pg_trgm' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent at the game that had a record of 93-54?", "schema": "CREATE TABLE table_name_73 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_name_73 WHERE record = '93-54';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total value of works in the impressionism category?", "schema": "CREATE TABLE sales (sale_id INT PRIMARY KEY, work_id INT, sale_price FLOAT, sale_date DATE, FOREIGN KEY (work_id) REFERENCES works(work_id));", "sql": "SELECT SUM(s.sale_price) AS total_value FROM sales s JOIN works w ON s.work_id = w.work_id WHERE w.style = 'Impressionism';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "How many cybersecurity incidents were reported by the Navy in H1 2021?", "schema": "CREATE TABLE NavyCybersecurityIncidents (id INT, incident_date DATE, incidents INT); INSERT INTO NavyCybersecurityIncidents (id, incident_date, incidents) VALUES (1, '2021-01-01', 10), (2, '2021-02-01', 15), (3, '2021-03-01', 20), (4, '2021-04-01', 25), (5, '2021-05-01', 30), (6, '2021-06-01', 35);", "sql": "SELECT SUM(incidents) FROM NavyCybersecurityIncidents WHERE incident_date BETWEEN '2021-01-01' AND '2021-06-01' AND MONTH(incident_date) <= 6;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "What is the average property price in each neighborhood in the city of \"Oakland\"?", "schema": "CREATE TABLE neighborhoods (neighborhood_id INT, neighborhood_name TEXT, city_id INT, PRIMARY KEY (neighborhood_id)); INSERT INTO neighborhoods (neighborhood_id, neighborhood_name, city_id) VALUES (1, 'Jack London', 3), (2, 'Chinatown', 3), (3, 'West Oakland', 3); CREATE TABLE properties (property_id INT, price FLOAT, neighborhood_id INT, PRIMARY KEY (property_id), FOREIGN KEY (neighborhood_id) REFERENCES neighborhoods(neighborhood_id));", "sql": "SELECT n.neighborhood_name, AVG(p.price) FROM properties p JOIN neighborhoods n ON p.neighborhood_id = n.neighborhood_id WHERE n.city_id = 3 GROUP BY n.neighborhood_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest crowd at victoria park?", "schema": "CREATE TABLE table_name_22 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT MIN(crowd) FROM table_name_22 WHERE venue = 'victoria park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many games have been played by each player in the \"RetroGamers\" community?", "schema": "CREATE TABLE Players (PlayerID INT PRIMARY KEY, Name VARCHAR(50), GamingCommunity VARCHAR(50)); CREATE TABLE Games (GameID INT PRIMARY KEY, GameName VARCHAR(50), PlayerID INT, FOREIGN KEY (PlayerID) REFERENCES Players(PlayerID)); INSERT INTO Players (PlayerID, Name, GamingCommunity) VALUES (1, 'John Doe', 'RetroGamers'), (2, 'Jane Smith', 'RetroGamers'), (3, 'Alice Johnson', 'GamingCommunity'); INSERT INTO Games (GameID, GameName, PlayerID) VALUES (1, 'Mario', 1), (2, 'Sonic', 1), (3, 'PacMan', 2), (4, 'Tetris', 3);", "sql": "SELECT Players.Name, COUNT(Games.GameName) FROM Players JOIN Games ON Players.PlayerID = Games.PlayerID WHERE Players.GamingCommunity = 'RetroGamers' GROUP BY Players.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the total number of vessels that docked in the Port of Long Beach in the past year?", "schema": "CREATE TABLE port_long_beach_vessels (vessel_id INT, docking_date DATE);", "sql": "SELECT COUNT(*) FROM port_long_beach_vessels WHERE docking_date >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much did the home team st kilda score?", "schema": "CREATE TABLE table_name_76 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_76 WHERE home_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which American Labor candidate ran against Democratic candidate Robert F. Wagner?", "schema": "CREATE TABLE table_name_51 (american_labor_ticket VARCHAR, democratic_ticket VARCHAR)", "sql": "SELECT american_labor_ticket FROM table_name_51 WHERE democratic_ticket = 'robert f. wagner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Insert a new environmental impact assessment for 'Ammonia' in the \"environmental_impact\" table", "schema": "CREATE TABLE environmental_impact (id INT PRIMARY KEY, chemical_name VARCHAR(255), environmental_impact VARCHAR(255), date_assessed DATE);", "sql": "INSERT INTO environmental_impact (id, chemical_name, environmental_impact, date_assessed) VALUES (1, 'Ammonia', 'High greenhouse gas emissions', '2022-01-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "What is the average daily budget for ad campaigns?", "schema": "CREATE TABLE ad_campaigns (id INT, name VARCHAR(50), budget INT, start_date DATE, end_date DATE); INSERT INTO ad_campaigns (id, name, budget, start_date, end_date) VALUES (7, 'Holi', 8000, '2021-03-28', '2021-03-30'), (8, 'Navruz', 11000, '2021-03-21', '2021-03-23');", "sql": "SELECT AVG(budget / DATEDIFF(day, start_date, end_date)) as avg_daily_budget FROM ad_campaigns;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "How many military aircraft maintenance requests were recorded for the Air Force in Q4 2019?", "schema": "CREATE TABLE maintenance_requests (request_id INT, service_branch VARCHAR(255), request_date DATE); INSERT INTO maintenance_requests (request_id, service_branch, request_date) VALUES (1, 'Air Force', '2019-10-01'), (2, 'Navy', '2019-12-02'), (3, 'Air Force', '2019-11-03');", "sql": "SELECT COUNT(*) FROM maintenance_requests WHERE service_branch = 'Air Force' AND EXTRACT(QUARTER FROM request_date) = 4 AND EXTRACT(YEAR FROM request_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "How many vessels are there in total for each flag?", "schema": "CREATE TABLE Vessels (Id INT, Name VARCHAR(50), Type VARCHAR(50), Flag VARCHAR(50)); INSERT INTO Vessels (Id, Name, Type, Flag) VALUES (3, 'VesselC', 'Bulk Carrier', 'Brazil'), (4, 'VesselD', 'Container Ship', 'Brazil');", "sql": "SELECT V.Flag, COUNT(V.Id) FROM Vessels V GROUP BY V.Flag;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the maximum sales for any vegetarian item?", "schema": "CREATE TABLE menu (item_id INT, item_name TEXT, type TEXT, sales INT); INSERT INTO menu VALUES (1, 'Veggie Burger', 'Vegetarian', 250), (2, 'Falafel Wrap', 'Vegetarian', 150), (3, 'Garden Salad', 'Vegetarian', 100), (4, 'Hummus Plate', 'Vegetarian', 120);", "sql": "SELECT MAX(sales) FROM menu WHERE type = 'Vegetarian';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result where the original artist is Prince and the Revolution?", "schema": "CREATE TABLE table_25374338_1 (result VARCHAR, original_artist VARCHAR)", "sql": "SELECT result FROM table_25374338_1 WHERE original_artist = 'Prince and The Revolution';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--3.2.1--4.0.0, item 33).", "schema": null, "sql": "CREATE FUNCTION @extschema@.autovacuum_reset(p_parent_schema text, p_parent_tablename text, p_source_schema text DEFAULT NULL, p_source_tablename text DEFAULT NULL) RETURNS boolean\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nv_row record;\nv_sql text;\n\nBEGIN\n\n v_sql = format('ALTER TABLE %I.%I RESET (autovacuum_enabled, toast.autovacuum_enabled)', p_parent_schema, p_parent_tablename);\n RAISE DEBUG 'partition_data sql: %', v_sql;\n EXECUTE v_sql;\n\n IF p_source_tablename IS NOT NULL THEN\n v_sql = format('ALTER TABLE %I.%I RESET (autovacuum_enabled, toast.autovacuum_enabled)', p_source_schema, p_source_tablename);\n RAISE DEBUG 'partition_data sql: %', v_sql;\n EXECUTE v_sql;\n END IF;\n\n FOR v_row IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(p_parent_schema||'.'||p_parent_tablename, 'ASC')\n LOOP\n v_sql = format('ALTER TABLE %I.%I RESET (autovacuum_enabled, toast.autovacuum_enabled)', v_row.partition_schemaname, v_row.partition_tablename);\n RAISE DEBUG 'partition_data sql: %', v_sql;\n EXECUTE v_sql;\n END LOOP;\n\n RETURN true;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1164, "num_statements": 15} {"question": "Write the DDL statement from PostgreSQL regression test 'subscription' (example 4).", "schema": null, "sql": "CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER;", "explanation": "DDL from PostgreSQL core regression test for Subscription.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which professionals live in a city containing the substring 'West'? List his or her role, street, city and state.", "schema": "CREATE TABLE professionals (role_code VARCHAR, street VARCHAR, city VARCHAR, state VARCHAR)", "sql": "SELECT role_code, street, city, state FROM professionals WHERE city LIKE '%West%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total attendance at events organized by museums in Germany?", "schema": "CREATE TABLE museum_events (id INT, name VARCHAR(255), date DATE, museum_name VARCHAR(255), attendance INT); INSERT INTO museum_events (id, name, date, museum_name, attendance) VALUES (1, 'Art Exhibition', '2020-02-01', 'German Museum', 1500), (2, 'History Conference', '2020-03-15', 'German Museum', 1200), (3, 'Science Fair', '2020-04-01', 'German Museum', 1800);", "sql": "SELECT SUM(attendance) FROM museum_events WHERE museum_name LIKE '%German%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year is the latest year that has no under director?", "schema": "CREATE TABLE table_name_18 (year INTEGER, director VARCHAR)", "sql": "SELECT MAX(year) FROM table_name_18 WHERE director = 'no';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL Perform: show example 19.", "schema": null, "sql": "EXPLAIN SELECT * FROM tenk1 WHERE unique1 = 42; QUERY PLAN -------------------------------------------------------------------&zwsp;---------- Index Scan using tenk1_unique1 on tenk1 (cost=0.29..8.30 rows=1 width=244) Index Cond: (unique1 = 42);", "explanation": "Example from PostgreSQL documentation on Perform.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": false, "sql_length": 245, "num_statements": 3} {"question": "PostgreSQL regression test 'union': Write the SELECT query (example 8).", "schema": null, "sql": "SELECT 1.1 AS two UNION SELECT 2.2 ORDER BY 1;", "explanation": "Regression test for Union in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 1.1 AS two UNION SELECT 2.2 ORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the mental health score distribution by healthcare provider's years of experience?", "schema": "CREATE TABLE Experience (ExperienceID INT, Experience VARCHAR(50)); CREATE TABLE MentalHealthScores (MH_ID INT, ExperienceID INT, MentalHealthScore INT); INSERT INTO Experience (ExperienceID, Experience) VALUES (1, '0-5 years'), (2, '6-10 years'), (3, '11-20 years'), (4, '20+ years'); INSERT INTO MentalHealthScores (MH_ID, ExperienceID, MentalHealthScore) VALUES (1, 1, 85), (2, 1, 90), (3, 2, 75), (4, 2, 70), (5, 3, 80), (6, 3, 85), (7, 4, 65), (8, 4, 70), (9, 1, 95), (10, 2, 80);", "sql": "SELECT e.Experience, AVG(mhs.MentalHealthScore) as Avg_Score FROM MentalHealthScores mhs JOIN Experience e ON mhs.ExperienceID = e.ExperienceID GROUP BY e.Experience;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Calculate the total funding for neurological disorder related projects.", "schema": "CREATE SCHEMA if not exists genetic_research;CREATE TABLE if not exists genetic_research.projects(id INT, name TEXT, lead_researcher TEXT, disease_category TEXT, funding FLOAT);INSERT INTO genetic_research.projects (id, name, lead_researcher, disease_category, funding) VALUES (1, 'ProjectX', 'Dr. Jane Smith', 'Cancer', 2000000), (2, 'ProjectY', 'Dr. John Doe', 'Neurological Disorders', 3000000), (3, 'ProjectZ', 'Dr. Maria Garcia', 'Cancer', 4000000);", "sql": "SELECT SUM(funding) FROM genetic_research.projects WHERE disease_category = 'Neurological Disorders';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Which products have a higher consumer preference score than their average consumer preference score for their respective categories?", "schema": "CREATE TABLE categories (id INT, product_id INT, category TEXT); INSERT INTO categories (id, product_id, category) VALUES (1, 1, 'Skincare'), (2, 2, 'Haircare'), (3, 3, 'Skincare'); CREATE TABLE category_averages (category TEXT, avg_preference FLOAT); INSERT INTO category_averages (category, avg_preference) VALUES ('Skincare', 4.4), ('Haircare', 4.2);", "sql": "SELECT p.product_name, p.consumer_preference FROM products p JOIN categories c ON p.product_id = c.product_id JOIN category_averages ca ON c.category = ca.category WHERE p.consumer_preference > ca.avg_preference;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 553).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION is_strict( NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What was the budget for defense project Z in 2019 and 2020?", "schema": "CREATE TABLE defense_projects (id INT, project VARCHAR(50), year INT, budget FLOAT); INSERT INTO defense_projects (id, project, year, budget) VALUES (1, 'Project Z', 2019, 1100000), (2, 'Project Z', 2020, 1300000);", "sql": "SELECT project, year, budget FROM defense_projects WHERE project = 'Project Z';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest year with less than 3 points and Parmalat Forti Ford was the entrant?", "schema": "CREATE TABLE table_name_40 (year INTEGER, points VARCHAR, entrant VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_40 WHERE points < 3 AND entrant = 'parmalat forti ford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Lane has a Time larger than 47.83?", "schema": "CREATE TABLE table_name_8 (lane INTEGER, time INTEGER)", "sql": "SELECT AVG(lane) FROM table_name_8 WHERE time > 47.83;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total production of oil from the fields in the North Sea?", "schema": "CREATE TABLE north_sea_oil_production (field VARCHAR(255), year INT, production FLOAT);", "sql": "SELECT SUM(production) FROM north_sea_oil_production WHERE field LIKE '%North Sea%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 7).", "schema": null, "sql": "SELECT JSON_EXISTS(jsonb '1', '$.a');", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_EXISTS(jsonb '1', '$.a')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 37, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 926).", "schema": null, "sql": "select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '[\"string\", \"numeric\"]');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '[\"string\", \"numeric\"]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 145, "num_statements": 1} {"question": "Show the total data usage for mobile and broadband subscribers in 'rural' areas, excluding those with billing issues in the past 6 months.", "schema": "CREATE TABLE Subscribers (SubscriberID int, DataUsage int, Service varchar(10), Area varchar(10), BillingIssue bit); INSERT INTO Subscribers (SubscriberID, DataUsage, Service, Area, BillingIssue) VALUES (1, 20000, 'mobile', 'rural', 0), (2, 30000, 'broadband', 'urban', 1), (3, 15000, 'mobile', 'rural', 1), (4, 25000, 'broadband', 'urban', 0), (5, 35000, 'broadband', 'rural', 0);", "sql": "SELECT DataUsage FROM Subscribers WHERE Area = 'rural' AND BillingIssue = 0 AND Service IN ('mobile', 'broadband');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the last match with a vacancy date of round 2 and Ney Franco as outgoing manager?", "schema": "CREATE TABLE table_name_26 (last_match VARCHAR, date_of_vacancy VARCHAR, outgoing_manager VARCHAR)", "sql": "SELECT last_match FROM table_name_26 WHERE date_of_vacancy = 'round 2' AND outgoing_manager = 'ney franco';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Add a new diversity metric record into the 'diversity_metrics' table", "schema": "CREATE TABLE diversity_metrics (id INT PRIMARY KEY, year INT, gender VARCHAR(10), percentage_representation DECIMAL(5,2));", "sql": "INSERT INTO diversity_metrics (id, year, gender, percentage_representation) VALUES (2022, 'Female', 45.30);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average rating of eco-friendly hotels in Portugal and Spain?", "schema": "CREATE TABLE eco_hotels (hotel_id INT, name TEXT, country TEXT, rating FLOAT); INSERT INTO eco_hotels VALUES (1, 'Eco Hotel Lisbon', 'Portugal', 4.6), (2, 'Green Hotel Barcelona', 'Spain', 4.4);", "sql": "SELECT AVG(rating) FROM eco_hotels WHERE country IN ('Portugal', 'Spain');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "List all biotech startups that received funding in the last 6 months.", "schema": "CREATE SCHEMA if not exists biotech;CREATE TABLE biotech.startups_funding (id INT, startup_name VARCHAR(50), funding_date DATE, funding_amount DECIMAL(10,2));INSERT INTO biotech.startups_funding (id, startup_name, funding_date, funding_amount) VALUES (1, 'StartupA', '2022-01-15', 5000000.00), (2, 'StartupB', '2022-06-30', 3000000.00), (3, 'StartupC', '2021-12-31', 2000000.00);", "sql": "SELECT * FROM biotech.startups_funding WHERE funding_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the total number of male and female members?", "schema": "CREATE TABLE membership (id INT, member_id INT, gender VARCHAR(10)); INSERT INTO membership (id, member_id, gender) VALUES (1, 401, 'male'), (2, 402, 'female'), (3, 403, 'male'), (4, 404, 'non-binary');", "sql": "SELECT SUM(gender = 'male') AS male_count, SUM(gender = 'female') AS female_count FROM membership;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 92).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (1,7,'-69808760806266041400340.70700818693892852138813934414383886494691670042143650609934777814995087699409404201920249076407981012095999320858479644760715204999741683528746097757549835956359129287002171391961763797857794730120426599135099619822532290339000466211195776337667123320942107370731349851576864242697412616810236323676004067839744992733887503405311090677026008324895177587064547630828026123718296429295638934384446325302964896473296829265805737112709269803814942537657996725913938408781715328945194948010970');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 553, "num_statements": 1} {"question": "List all the shipments that were delayed by more than 2 hours from the 'shipments' table.", "schema": "CREATE TABLE shipments (shipment_id INT, customer_id INT, shipped_date TIMESTAMP, shipped_time TIME, delivered_date TIMESTAMP, delivered_time TIME, status TEXT, delay DECIMAL(3,2));", "sql": "SELECT shipment_id, customer_id, shipped_date, shipped_time, delivered_date, delivered_time, status, delay FROM shipments WHERE delay > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the arranger for the song that had a lyricist of Sirapatara Kalayapanid?", "schema": "CREATE TABLE table_name_8 (arranger_s_ VARCHAR, lyricist_s_ VARCHAR)", "sql": "SELECT arranger_s_ FROM table_name_8 WHERE lyricist_s_ = 'sirapatara kalayapanid';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many military technology patents were filed by each country in the last 3 years?", "schema": "CREATE TABLE MilitaryPatents (Id INT, Country VARCHAR(50), Patent VARCHAR(50), Year INT); INSERT INTO MilitaryPatents (Id, Country, Patent, Year) VALUES (1, 'USA', 'Laser Communication', 2021); INSERT INTO MilitaryPatents (Id, Country, Patent, Year) VALUES (2, 'China', 'Drone Swarm', 2021);", "sql": "SELECT COUNT(*), Country FROM MilitaryPatents WHERE Year >= (YEAR(CURRENT_DATE) - 3) GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 124).", "schema": null, "sql": "SELECT * FROM check_test(\n rules_are( 'public', 'fou', ARRAY['ins_me', 'upd_me', 'del_me'] ),\n false,\n 'rules_are(schema, table, rules) + missing',\n 'Relation public.fou should have the correct rules',\n ' Missing rules:\n del_me'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "What is the maximum budget allocated for any category in the West region in the year 2020?", "schema": "CREATE TABLE Budget (Year INT, Region VARCHAR(50), Category VARCHAR(50), Amount INT); INSERT INTO Budget (Year, Region, Category, Amount) VALUES (2020, 'West', 'Education', 5000000), (2020, 'West', 'Public Transportation', 6000000);", "sql": "SELECT MAX(Amount) FROM Budget WHERE Year = 2020 AND Region = 'West';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the distribution of food safety inspections by month?", "schema": "CREATE TABLE inspections (id INT, date TEXT, result TEXT); INSERT INTO inspections (id, date, result) VALUES (1, '2020-01-01', 'Pass'), (2, '2020-02-01', 'Fail');", "sql": "SELECT EXTRACT(MONTH FROM date) AS month, COUNT(*) AS num_inspections FROM inspections GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what was the away team for the north melbourne home team?", "schema": "CREATE TABLE table_name_60 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_60 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Show the number of wind farms in Oklahoma", "schema": "CREATE TABLE Infrastructure (id INT, name VARCHAR(100), type VARCHAR(50), location VARCHAR(100), state VARCHAR(50)); INSERT INTO Infrastructure (id, name, type, location, state) VALUES (9, 'Western Plains Wind Farm', 'Wind Farm', 'Woodward', 'Oklahoma');", "sql": "SELECT COUNT(*) FROM Infrastructure WHERE type = 'Wind Farm' AND state = 'Oklahoma';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Who are the teachers that have not taken any professional development courses and have more than 5 years of experience?", "schema": "CREATE TABLE Teachers (TeacherID INT, Name VARCHAR(50), ProfessionalDevelopmentCourse INT, YearsOfExperience INT); INSERT INTO Teachers (TeacherID, Name, ProfessionalDevelopmentCourse, YearsOfExperience) VALUES (5, 'Ava Red', 0, 6); INSERT INTO Teachers (TeacherID, Name, ProfessionalDevelopmentCourse, YearsOfExperience) VALUES (6, 'Benjamin Orange', 0, 3);", "sql": "SELECT Name FROM Teachers WHERE ProfessionalDevelopmentCourse = 0 AND YearsOfExperience > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "How many 'Education' events were held in 'Toronto' and 'Vancouver' between 2020-01-01 and 2020-12-31?", "schema": "CREATE TABLE Donors (DonorID INT PRIMARY KEY, FirstName VARCHAR(50), LastName VARCHAR(50), DonationAmount DECIMAL(10,2), DonationDate DATE); CREATE TABLE DonationEvents (EventID INT PRIMARY KEY, EventName VARCHAR(100), EventType VARCHAR(100), DonationID INT, EventLocation VARCHAR(100), EventDate DATE, FOREIGN KEY (DonationID) REFERENCES Donors(DonorID));", "sql": "SELECT COUNT(*) as NumberOfEvents FROM DonationEvents e WHERE e.EventType = 'Education' AND e.EventLocation IN ('Toronto', 'Vancouver') AND e.EventDate BETWEEN '2020-01-01' AND '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 750).", "schema": null, "sql": "SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied\nSELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied\nSELECT * FROM tststats.priv_test_parent_tbl t\n WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied\nDELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied\n\n-- Grant table access to parent, but hide all data with RLS\nRESET SESSION AUTHORIZATION;", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied\nSELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied\nSELECT * FROM tststats.priv_test_parent_tbl t\n WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied\nDELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied\n\n-- Grant table access to parent, but hide all data with RLS\nRESET SESSION AUTHORIZATION) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 489, "num_statements": 5} {"question": "pgTAP test for Inheritance (assertion 143).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_descendent_of( 'child2', 'parent' ),\n false,\n 'isnt_descendent_of( ctab2, ptab )',\n 'Table child2 should not be a descendent of parent',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "What is the average safety rating for products with cruelty-free certification?", "schema": "CREATE TABLE product_safety_records (id INT PRIMARY KEY, product_name VARCHAR(100), safety_rating INT, inspection_date DATE); CREATE TABLE cruelty_free_certification (id INT PRIMARY KEY, product_name VARCHAR(100), certification_date DATE, certification_status VARCHAR(10)); INSERT INTO cruelty_free_certification (id, product_name, certification_date, certification_status) VALUES (1, 'Lotion', '2021-01-01', 'Certified'); INSERT INTO cruelty_free_certification (id, product_name, certification_date, certification_status) VALUES (2, 'Lip Balm', '2021-01-02', 'Not Certified');", "sql": "SELECT AVG(safety_rating) as avg_safety_rating FROM product_safety_records ps INNER JOIN cruelty_free_certification cfc ON ps.product_name = cfc.product_name WHERE cfc.certification_status = 'Certified';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the name of the host team dated December 16?", "schema": "CREATE TABLE table_name_39 (host_team VARCHAR, date VARCHAR)", "sql": "SELECT host_team FROM table_name_39 WHERE date = 'december 16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.2.3--2.3.0, item 17).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION create_trigger(p_parent_table text) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nv_function_name text;\nv_new_length int;\nv_parent_schema text;\nv_parent_tablename text;\nv_trig_name text;\nv_trig_sql text;\n\nBEGIN\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)\nAND tablename = split_part(p_parent_table, '.', 2);\nv_trig_name := @extschema@.check_name_length(p_object_name := v_parent_tablename, p_suffix := '_part_trig');\n-- Ensure function name matches the naming pattern\nv_function_name := @extschema@.check_name_length(v_parent_tablename, '_part_trig_func', FALSE);\nv_trig_sql := format('CREATE TRIGGER %I BEFORE INSERT ON %I.%I FOR EACH ROW EXECUTE PROCEDURE %I.%I()'\n , v_trig_name\n , v_parent_schema\n , v_parent_tablename\n , v_parent_schema\n , v_function_name);\n\nEXECUTE v_trig_sql;\n\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 1021, "num_statements": 12} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 520).", "schema": null, "sql": "CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test;", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of the original airing on e4 May 2, 2010?", "schema": "CREATE TABLE table_22170495_6 (title VARCHAR, original_airing_on_e4 VARCHAR)", "sql": "SELECT title FROM table_22170495_6 WHERE original_airing_on_e4 = 'May 2, 2010';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Outcome of the match with Partner Kateryna Bondarenko?", "schema": "CREATE TABLE table_name_84 (outcome VARCHAR, partner VARCHAR)", "sql": "SELECT outcome FROM table_name_84 WHERE partner = 'kateryna bondarenko';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 64).", "schema": null, "sql": "SELECT '-32768.6'::float4::int2;", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '-32768.6'::float4::int2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points is there when the lost is 6 and the try bonus is 9?", "schema": "CREATE TABLE table_name_24 (points_for VARCHAR, lost VARCHAR, try_bonus VARCHAR)", "sql": "SELECT points_for FROM table_name_24 WHERE lost = '6' AND try_bonus = '9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many traffic violations were issued in the past month for each district in the Transportation department?", "schema": "CREATE TABLE TrafficViolations (ViolationID INT, ViolationDate DATE, District VARCHAR(255)); INSERT INTO TrafficViolations (ViolationID, ViolationDate, District) VALUES (1, '2022-01-01', 'District A'), (2, '2022-01-15', 'District B'), (3, '2022-02-01', 'District A');", "sql": "SELECT COUNT(*), District FROM TrafficViolations WHERE ViolationDate >= DATEADD(month, -1, GETDATE()) GROUP BY District;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position does Kerry Wood play in?", "schema": "CREATE TABLE table_11677100_3 (position VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_11677100_3 WHERE player = 'Kerry Wood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the percentage of customers using 4G and 5G networks in each customer region?", "schema": "CREATE TABLE customer_network_data (customer_region VARCHAR(20), network_type VARCHAR(20), customer_count INT); INSERT INTO customer_network_data (customer_region, network_type, customer_count) VALUES ('Northeast', '4G', 2500), ('Southwest', '4G', 2000), ('Midwest', '4G', 2200), ('Northeast', '5G', 3000), ('Southwest', '5G', 2500), ('Midwest', '5G', 3300);", "sql": "SELECT customer_region, ((SUM(CASE WHEN network_type = '5G' THEN customer_count ELSE 0 END) / SUM(customer_count)) * 100) AS pct_5g_customers, ((SUM(CASE WHEN network_type = '4G' THEN customer_count ELSE 0 END) / SUM(customer_count)) * 100) AS pct_4g_customers FROM customer_network_data GROUP BY customer_region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 313, "num_statements": 1} {"question": "What is the total number of news stories published in the \"news\" table for each country in the \"reporters\" table?", "schema": "CREATE TABLE reporters (id INT, name VARCHAR(50), gender VARCHAR(10), age INT, country VARCHAR(50)); CREATE TABLE published_stories (reporter_id INT, news_id INT); CREATE TABLE news (id INT, title VARCHAR(100), views INT, date DATE, country VARCHAR(50));", "sql": "SELECT r.country, COUNT(*) AS total_stories FROM reporters r INNER JOIN published_stories ps ON r.id = ps.reporter_id INNER JOIN news n ON ps.news_id = n.id GROUP BY r.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year was the name mount roskill grammar school?", "schema": "CREATE TABLE table_name_35 (years VARCHAR, name VARCHAR)", "sql": "SELECT years FROM table_name_35 WHERE name = 'mount roskill grammar school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'event_trigger' (example 64).", "schema": null, "sql": "create event trigger no_rewrite_allowed on table_rewrite\n execute procedure test_evtrig_no_rewrite();", "explanation": "DDL from PostgreSQL core regression test for Event Trigger.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the name of musicals that do not have actors.", "schema": "CREATE TABLE actor (Name VARCHAR, Musical_ID VARCHAR); CREATE TABLE musical (Name VARCHAR, Musical_ID VARCHAR)", "sql": "SELECT Name FROM musical WHERE NOT Musical_ID IN (SELECT Musical_ID FROM actor);", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 120).", "schema": null, "sql": "SELECT interval '1 2' minute to second;", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '1 2' minute to second) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "pgTAP test for Fktap (assertion 4).", "schema": null, "sql": "CREATE TABLE public.fk (\n id INT NOT NULL PRIMARY KEY,\n pk_id INT NOT NULL REFERENCES pk(id)\n);", "explanation": "SQL assertion from pgTAP test suite for Fktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Multi 1 has a Frequency of 2300mhz, and a Release date of q3 2008 and a Model number of turion x2 ultra zm-84?", "schema": "CREATE TABLE table_name_7 (multi_1 VARCHAR, model_number VARCHAR, frequency VARCHAR, release_date VARCHAR)", "sql": "SELECT multi_1 FROM table_name_7 WHERE frequency = '2300mhz' AND release_date = 'q3 2008' AND model_number = 'turion x2 ultra zm-84';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Insert a new record for a farm with farm ID 2 and species ID 2", "schema": "CREATE TABLE Aquatic_Farm (Farm_ID INT, Farm_Name VARCHAR(100), Species_ID INT, Stock_Quantity INT); INSERT INTO Aquatic_Farm (Farm_ID, Farm_Name, Species_ID, Stock_Quantity) VALUES (1, 'North Sea Fishery', 1, 25000);", "sql": "INSERT INTO Aquatic_Farm (Farm_ID, Farm_Name, Species_ID, Stock_Quantity) VALUES (2, 'Pacific Fishery', 2, 15000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most attendance", "schema": "CREATE TABLE table_16227492_1 (attendance INTEGER)", "sql": "SELECT MAX(attendance) FROM table_16227492_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "How many community health workers have completed health equity training by race?", "schema": "CREATE TABLE CommunityHealthWorkers (WorkerID INT, Age INT, Race VARCHAR(25)); INSERT INTO CommunityHealthWorkers (WorkerID, Age, Race) VALUES (1, 35, 'Hispanic'), (2, 42, 'African American'), (3, 50, 'Caucasian'); CREATE TABLE HealthEquityTraining (WorkerID INT, Completed BOOLEAN); INSERT INTO HealthEquityTraining (WorkerID, Completed) VALUES (1, TRUE), (2, FALSE), (3, TRUE);", "sql": "SELECT c.Race, COUNT(*) AS CompletedHealthEquityTraining FROM CommunityHealthWorkers c INNER JOIN HealthEquityTraining t ON c.WorkerID = t.WorkerID WHERE t.Completed = TRUE GROUP BY c.Race;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (create_table, item 6).", "schema": null, "sql": "\\set ON_ERROR_STOP 1\n\nCREATE TABLE test_delete(time timestamp with time zone PRIMARY KEY, temp float);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the names of countries that have more than one roller coaster.", "schema": "CREATE TABLE roller_coaster (Country_ID VARCHAR); CREATE TABLE country (Name VARCHAR, Country_ID VARCHAR)", "sql": "SELECT T1.Name FROM country AS T1 JOIN roller_coaster AS T2 ON T1.Country_ID = T2.Country_ID GROUP BY T1.Name HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the minimum die size for an SM count of exactly 2?", "schema": "CREATE TABLE table_26040604_1 (die_size__mm_2__ INTEGER, sm_count VARCHAR)", "sql": "SELECT MIN(die_size__mm_2__) FROM table_26040604_1 WHERE sm_count = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 209).", "schema": null, "sql": "select 33 * any (44);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 33 * any (44)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "How many police officers are assigned to each district?", "schema": "CREATE TABLE police_department (id INT, district_name VARCHAR(20), officer_count INT); INSERT INTO police_department (id, district_name, officer_count) VALUES (1, 'District1', 120), (2, 'District2', 130), (3, 'District3', 150);", "sql": "SELECT district_name, officer_count, officer_count/SUM(officer_count) OVER () * 100 AS percentage FROM police_department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Which drug was approved by the FDA in 2019 with the highest sales?", "schema": "CREATE TABLE Drug_Approvals(drug VARCHAR(20), approval_year INT, company VARCHAR(20));CREATE TABLE Drug_Sales(drug VARCHAR(20), year INT, sales DECIMAL(10,2));INSERT INTO Drug_Approvals VALUES('DrugA', 2019, 'PharmaCorp');INSERT INTO Drug_Sales VALUES('DrugA', 2019, 2000000.00);", "sql": "SELECT a.drug, MAX(s.sales) FROM Drug_Approvals a INNER JOIN Drug_Sales s ON a.drug = s.drug WHERE a.approval_year = 2019 GROUP BY a.drug;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all cartoon directed by \"Ben Jones\".", "schema": "CREATE TABLE Cartoon (Title VARCHAR, Directed_by VARCHAR)", "sql": "SELECT Title FROM Cartoon WHERE Directed_by = 'Ben Jones';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Determine the total financial capability training hours for employees in microfinance organizations in Asia", "schema": "CREATE TABLE AsiaMicrofinance (id INT, employee_id INT, training_hours INT); INSERT INTO AsiaMicrofinance (id, employee_id, training_hours) VALUES (1, 1, 25), (2, 2, 35);", "sql": "SELECT SUM(training_hours) FROM AsiaMicrofinance;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average number of Wins in a PGA Championship with a Top-5 less than 2?", "schema": "CREATE TABLE table_name_87 (wins INTEGER, tournament VARCHAR, top_5 VARCHAR)", "sql": "SELECT AVG(wins) FROM table_name_87 WHERE tournament = 'pga championship' AND top_5 < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which representative was the Ambassador Extraordinary and Plenipotentiary and had a Termination of Mission date September 20, 1996?", "schema": "CREATE TABLE table_name_83 (representative VARCHAR, title VARCHAR, termination_of_mission VARCHAR)", "sql": "SELECT representative FROM table_name_83 WHERE title = 'ambassador extraordinary and plenipotentiary' AND termination_of_mission = 'september 20, 1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Game has a Score of 4–1?", "schema": "CREATE TABLE table_name_47 (game INTEGER, score VARCHAR)", "sql": "SELECT SUM(game) FROM table_name_47 WHERE score = '4–1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total water usage for each product category in the last year?", "schema": "CREATE TABLE Product (id INT, name VARCHAR(255), category VARCHAR(255), water_usage FLOAT, sale_date DATE);", "sql": "SELECT category, SUM(water_usage) as total_water_usage FROM Product WHERE sale_date >= (CURRENT_DATE - INTERVAL '1 year') GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total brup for the team?", "schema": "CREATE TABLE table_18064020_21 (brup VARCHAR, name VARCHAR)", "sql": "SELECT brup FROM table_18064020_21 WHERE name = 'total';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the maximum temperature recorded in each year of astrophysics research?", "schema": "CREATE TABLE astrophysics_research_temperature (id INT PRIMARY KEY, project_year INT, temperature FLOAT);", "sql": "SELECT project_year, MAX(temperature) as max_temperature FROM astrophysics_research_temperature GROUP BY project_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "List all airports with a runway length greater than 3000 meters", "schema": "CREATE TABLE Airports (airport_id int, airport_name varchar(255), runway_length decimal(10,2), location varchar(255));", "sql": "SELECT airport_id, airport_name, runway_length, location FROM Airports WHERE runway_length > 3000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 127).", "schema": null, "sql": "SELECT '4714-11-23 23:59:59+00 BC'::timestamptz; -- out of range\nSELECT '294276-12-31 23:59:59+00'::timestamptz;", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '4714-11-23 23:59:59+00 BC'::timestamptz; -- out of range\nSELECT '294276-12-31 23:59:59+00'::timestamptz) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: Which district is jamie l. whitten from?", "schema": "CREATE TABLE table_1342233_24 (district VARCHAR, incumbent VARCHAR)", "sql": "SELECT district FROM table_1342233_24 WHERE incumbent = 'Jamie L. Whitten';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "List all genetic research projects in Japan using CRISPR technology?", "schema": "CREATE TABLE research_projects (id INT, name TEXT, country TEXT, methods TEXT); INSERT INTO research_projects (id, name, country, methods) VALUES (1, 'GenomeEdge', 'Japan', 'CRISPR, Sequencing');", "sql": "SELECT name FROM research_projects WHERE country = 'Japan' AND methods LIKE '%CRISPR%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Delete all records from the 'weapons' table where the 'country' is 'Russia'", "schema": "CREATE TABLE weapons (id INT PRIMARY KEY, weapon_name VARCHAR(50), country VARCHAR(50)); INSERT INTO weapons (id, weapon_name, country) VALUES (1, 'AK-47', 'Russia'); INSERT INTO weapons (id, weapon_name, country) VALUES (2, 'RPG-7', 'Russia');", "sql": "DELETE FROM weapons WHERE country = 'Russia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.1.0--2.2.0, item 31).", "schema": null, "sql": "/*\n * Function to manage pre-creation of the next partitions in a set.\n * Also manages dropping old partitions if the retention option is set.\n * If p_parent_table is passed, will only run run_maintenance() on that one table (no matter what the configuration table may have set for it)\n * Otherwise, will run on all tables in the config table with p_run_maintenance() set to true.\n * For large partition sets, running analyze can cause maintenance to take longer than expected. Can set p_analyze to false to avoid a forced analyze run.\n * Be aware that constraint exclusion may not work properly until an analyze on the partition set is run.\n */\nCREATE FUNCTION run_maintenance(p_parent_table text DEFAULT NULL, p_analyze boolean DEFAULT true, p_jobmon boolean DEFAULT true, p_debug boolean DEFAULT false) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_adv_lock boolean;\nv_check_subpart int;\nv_create_count int := 0;\nv_current_partition text;\nv_current_partition_id bigint;\nv_current_partition_timestamp timestamp;\nv_datetime_string text;\nv_drop_count int := 0;\nv_id_position int;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_last_partition text;\nv_last_partition_created boolean;\nv_last_partition_id bigint;\nv_last_partition_timestamp timestamp;\nv_max_id_parent bigint;\nv_max_time_parent timestamp;\nv_next_partition_id bigint;\nv_next_partition_timestamp timestamp;\nv_old_search_path text;\nv_parent_schema text;\nv_parent_tablename text;\nv_premade_count int;\nv_premake_id_max bigint;\nv_premake_id_min bigint;\nv_premake_timestamp_min timestamp;\nv_premake_timestamp_max timestamp;\nv_quarter text;\nv_row record;\nv_row_max_id record;\nv_row_max_time record;\nv_row_sub record;\nv_skip_maint boolean;\nv_step_id bigint;\nv_step_overflow_id bigint;\nv_step_serial_id bigint;\nv_sub_id_max bigint;\nv_sub_id_max_suffix bigint;\nv_sub_id_min bigint;\nv_sub_parent text;\nv_sub_timestamp_max timestamp;\nv_sub_timestamp_max_suffix timestamp;\nv_sub_timestamp_min timestamp;\nv_tablename text;\nv_tables_list_sql text;\nv_time_position int;\nv_year text;\n\nBEGIN\n\nv_adv_lock := pg_try_advisory_xact_lock(hashtext('pg_partman run_maintenance'));\nIF v_adv_lock = 'false' THEN\n RAISE NOTICE 'Partman maintenance already running.';\n RETURN;\nEND IF;\n\nIF p_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', '@extschema@,'||v_jobmon_schema, 'false');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_job_id := add_job('PARTMAN RUN MAINTENANCE');\n v_step_id := add_step(v_job_id, 'Running maintenance loop');\nEND IF;\n\n-- Check for consistent data in part_config_sub table. Was unable to get this working properly as either a constraint or trigger.\n-- Would either delay raising an error until the next write (which I cannot predict) or disallow future edits to update a sub-partition set's configuration.\n-- This way at least provides a consistent way to check that I know will run. If anyone can get a working constraint/trigger, please help!\n-- Don't have to worry about this in the serial trigger maintenance since subpartitioning requires run_maintenance().\nFOR v_row IN\n SELECT sub_parent FROM @extschema@.part_config_sub\nLOOP\n SELECT count(*) INTO v_check_subpart FROM @extschema@.check_subpart_sameconfig(v_row.sub_parent);\n IF v_check_subpart > 1 THEN\n RAISE EXCEPTION 'Inconsistent data in part_config_sub table. Sub-partition tables that are themselves sub-partitions cannot have differing configuration values among their siblings.\n Run this query: \"SELECT * FROM @extschema@.check_subpart_sameconfig(''%'');\" This should only return a single row or nothing.\n If multiple rows are returned, results are all children of the given parent. Update the differing values to be consistent for your desired values.', v_row.sub_parent;\n END IF;\nEND LOOP;\n\nv_row := NULL; -- Ensure it's reset\n\n\nv_tables_list_sql := 'SELECT parent_table\n , partition_type\n , partition_interval\n , control\n , premake\n , datetime_string\n , undo_in_progress\n , sub_partition_set_full\n , epoch\n FROM @extschema@.part_config\n WHERE sub_partition_set_full = false';\n\nIF p_parent_table IS NULL THEN\n v_tables_list_sql := v_tables_list_sql || ' AND use_run_maintenance = true';\nELSE\n v_tables_list_sql := v_tables_list_sql || format(' AND parent_table = %L', p_parent_table);\nEND IF;\n\nFOR v_row IN EXECUTE v_tables_list_sql\nLOOP\n\n CONTINUE WHEN v_row.undo_in_progress;\n v_skip_maint := true; -- reset every loop\n\n SELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename FROM pg_catalog.pg_tables WHERE schemaname ||'.'|| tablename = v_row.parent_table;\n\n SELECT partition_tablename INTO v_last_partition FROM @extschema@.show_partitions(v_row.parent_table, 'DESC') LIMIT 1;\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_last_partition: %', v_row.parent_table, v_last_partition;\n END IF;\n\n IF v_row.partition_type = 'time' OR v_row.partition_type = 'time-custom' THEN\n\n v_time_position := (length(v_last_partition) - position('p_' in reverse(v_last_partition))) + 2;\n IF v_row.partition_interval::interval <> '3 months' OR (v_row.partition_interval::interval = '3 months' AND v_row.partition_type = 'time-custom') THEN\n v_last_partition_timestamp := to_timestamp(substring(v_last_partition from v_time_position), v_row.datetime_string);\n ELSE\n -- to_timestamp doesn't recognize 'Q' date string formater. Handle it\n v_year := split_part(substring(v_last_partition FROM v_time_position), 'q', 1);\n v_quarter := split_part(substring(v_last_partition FROM v_time_position), 'q', 2);\n CASE\n WHEN v_quarter = '1' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-01-01', 'YYYY-MM-DD');\n WHEN v_quarter = '2' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-04-01', 'YYYY-MM-DD');\n WHEN v_quarter = '3' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-07-01', 'YYYY-MM-DD');\n WHEN v_quarter = '4' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-10-01', 'YYYY-MM-DD');\n END CASE;\n END IF;\n\n -- Loop through child tables starting from highest to get current max value in partition set\n -- Avoids doing a scan on entire partition set and/or getting any values accidentally in parent.\n FOR v_row_max_time IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC')\n LOOP\n IF v_row.epoch = false THEN\n EXECUTE format('SELECT max(%I)::text FROM %I.%I'\n , v_row.control\n , v_row_max_time.partition_schemaname\n , v_row_max_time.partition_tablename\n ) INTO v_current_partition_timestamp;\n ELSE\n EXECUTE format('SELECT to_timestamp(max(%I))::text FROM %I.%I'\n , v_row.control\n , v_row_max_time.partition_schemaname\n , v_row_max_time.partition_tablename\n ) INTO v_current_partition_timestamp;\n END IF;\n IF v_current_partition_timestamp IS NOT NULL THEN\n SELECT suffix_timestamp INTO v_current_partition_timestamp FROM show_partition_name(v_row.parent_table, v_current_partition_timestamp::text);\n EXIT;\n END IF;\n END LOOP;\n -- Check for values in the parent table. If they are there and greater than all child values, use that instead\n -- This allows maintenance to continue working properly if there is a large gap in data insertion. Data will remain in parent, but new tables will be created\n IF v_row.epoch = false THEN\n EXECUTE format('SELECT max(%I) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_time_parent;\n ELSE\n EXECUTE format('SELECT to_timestamp(max(%I)) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_time_parent;\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'run_maint: v_current_partition_timestamp: %, v_max_time_parent: %', v_current_partition_timestamp, v_max_time_parent;\n END IF;\n IF v_max_time_parent > v_current_partition_timestamp THEN\n SELECT suffix_timestamp INTO v_current_partition_timestamp FROM show_partition_name(v_row.parent_table, v_max_time_parent::text);\n END IF;\n IF v_current_partition_timestamp IS NULL THEN\n -- Partition set is completely empty. Nothing to do\n CONTINUE;\n END IF;\n\n -- If this is a subpartition, determine if the last child table has been made. If so, mark it as full so future maintenance runs can skip it\n SELECT sub_min::timestamp, sub_max::timestamp INTO v_sub_timestamp_min, v_sub_timestamp_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'time');\n IF v_sub_timestamp_max IS NOT NULL THEN\n SELECT suffix_timestamp INTO v_sub_timestamp_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_timestamp_max::text);\n IF v_sub_timestamp_max_suffix = v_last_partition_timestamp THEN\n -- Final partition for this set is created. Set full and skip it\n UPDATE @extschema@.part_config SET sub_partition_set_full = true WHERE parent_table = v_row.parent_table;\n CONTINUE;\n END IF;\n END IF;\n\n -- Check and see how many premade partitions there are.\n v_premade_count = round(EXTRACT('epoch' FROM age(v_last_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval));\n v_next_partition_timestamp := v_last_partition_timestamp;\n IF p_debug THEN\n RAISE NOTICE 'run_maint before loop: current_partition_timestamp: %, v_premade_count: %, v_sub_timestamp_min: %, v_sub_timestamp_max: %'\n , v_current_partition_timestamp\n , v_premade_count\n , v_sub_timestamp_min\n , v_sub_timestamp_max;\n END IF;\n -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed\n WHILE (v_premade_count < v_row.premake) LOOP\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_timestamp: %', v_row.parent_table, v_premade_count, v_next_partition_timestamp;\n END IF;\n IF v_next_partition_timestamp < v_sub_timestamp_min OR v_next_partition_timestamp > v_sub_timestamp_max THEN\n -- With subpartitioning, no need to run if the timestamp is not in the parent table's range\n EXIT;\n END IF;\n BEGIN\n v_next_partition_timestamp := v_next_partition_timestamp + v_row.partition_interval::interval;\n EXCEPTION WHEN datetime_field_overflow THEN\n v_premade_count := v_row.premake; -- do this so it can exit the premake check loop and continue in the outer for loop\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_overflow_id := add_step(v_job_id, 'Attempted partition time interval is outside PostgreSQL''s supported time range.');\n PERFORM update_step(v_step_overflow_id, 'CRITICAL', 'Child partition creation skippd for parent table '||v_partition_time);\n END IF;\n RAISE WARNING 'Attempted partition time interval is outside PostgreSQL''s supported time range. Child partition creation skipped for parent table %', v_row.parent_table;\n CONTINUE;\n END;\n v_last_partition_created := @extschema@.create_partition_time(v_row.parent_table, ARRAY[v_next_partition_timestamp], p_analyze);\n IF v_last_partition_created THEN\n v_create_count := v_create_count + 1;\n PERFORM @extschema@.create_function_time(v_row.parent_table, v_job_id);\n END IF;\n\n v_premade_count = round(EXTRACT('epoch' FROM age(v_next_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval));\n END LOOP;\n ELSIF v_row.partition_type = 'id' THEN\n -- Loop through child tables starting from highest to get current max value in partition set\n -- Avoids doing a scan on entire partition set and/or getting any values accidentally in parent.\n FOR v_row_max_id IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC')\n LOOP\n EXECUTE format('SELECT max(%I)::text FROM %I.%I'\n , v_row.control\n , v_row_max_id.partition_schemaname\n , v_row_max_id.partition_tablename) INTO v_current_partition_id;\n IF v_current_partition_id IS NOT NULL THEN\n SELECT suffix_id INTO v_current_partition_id FROM show_partition_name(v_row.parent_table, v_current_partition_id::text);\n EXIT;\n END IF;\n END LOOP;\n -- Check for values in the parent table. If they are there and greater than all child values, use that instead\n -- This allows maintenance to continue working properly if there is a large gap in data insertion. Data will remain in parent, but new tables will be created\n EXECUTE format('SELECT max(%I) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_id_parent;\n IF v_max_id_parent > v_current_partition_id THEN\n SELECT suffix_id INTO v_current_partition_id FROM show_partition_name(v_row.parent_table, v_max_id_parent::text);\n END IF;\n IF v_current_partition_id IS NULL THEN\n -- Partition set is completely empty. Nothing to do\n CONTINUE;\n END IF;\n\n v_id_position := (length(v_last_partition) - position('p_' in reverse(v_last_partition))) + 2;\n v_last_partition_id = substring(v_last_partition from v_id_position)::bigint;\n -- Determine if this table is a child of a subpartition parent. If so, get limits to see if run_maintenance even needs to run for it.\n SELECT sub_min::bigint, sub_max::bigint INTO v_sub_id_min, v_sub_id_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'id');\n IF v_sub_id_max IS NOT NULL THEN\n SELECT suffix_id INTO v_sub_id_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_id_max::text);\n IF v_sub_id_max_suffix = v_last_partition_id THEN\n -- Final partition for this set is created. Set full and skip it\n UPDATE @extschema@.part_config SET sub_partition_set_full = true WHERE parent_table = v_row.parent_table;\n CONTINUE;\n END IF;\n END IF;\n\n v_next_partition_id := v_last_partition_id;\n v_premade_count := ((v_last_partition_id - v_current_partition_id) / v_row.partition_interval::bigint);\n -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed.\n WHILE (v_premade_count < v_row.premake) LOOP\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_id: %', v_row.parent_table, v_premade_count, v_next_partition_id;\n END IF;\n IF v_next_partition_id < v_sub_id_min OR v_next_partition_id > v_sub_id_max THEN\n -- With subpartitioning, no need to run if the id is not in the parent table's range\n EXIT;\n END IF;\n v_next_partition_id := v_next_partition_id + v_row.partition_interval::bigint;\n v_last_partition_created := @extschema@.create_partition_id(v_row.parent_table, ARRAY[v_next_partition_id], p_analyze);\n IF v_last_partition_created THEN\n v_create_count := v_create_count + 1;\n PERFORM @extschema@.create_function_id(v_row.parent_table, v_job_id);\n END IF;\n v_premade_count := ((v_next_partition_id - v_current_partition_id) / v_row.partition_interval::bigint);\n END LOOP;\n\n END IF; -- end main IF check for time or id\n\n -- Manage additonal constraints if set\n PERFORM @extschema@.apply_constraints(p_parent_table := v_row.parent_table, p_job_id := v_job_id, p_debug := p_debug);\n\nEND LOOP; -- end of creation loop\n\n-- Manage dropping old partitions if retention option is set\nFOR v_row IN\n SELECT parent_table FROM @extschema@.part_config WHERE retention IS NOT NULL AND undo_in_progress = false AND\n (partition_type = 'time' OR partition_type = 'time-custom')\nLOOP\n IF p_parent_table IS NULL THEN\n v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table);\n ELSE -- Only run retention on table given in parameter\n IF p_parent_table <> v_row.parent_table THEN\n CONTINUE;\n ELSE\n v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table);\n END IF;\n END IF;\n IF v_drop_count > 0 THEN\n PERFORM @extschema@.create_function_time(v_row.parent_table, v_job_id);\n END IF;\nEND LOOP;\nFOR v_row IN\n SELECT parent_table FROM @extschema@.part_config WHERE retention IS NOT NULL AND undo_in_progress = false AND partition_type = 'id'\nLOOP\n IF p_parent_table IS NULL THEN\n v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table);\n ELSE -- Only run retention on table given in parameter\n IF p_parent_table <> v_row.parent_table THEN\n CONTINUE;\n ELSE\n v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table);\n END IF;\n END IF;\n IF v_drop_count > 0 THEN\n PERFORM @extschema@.create_function_id(v_row.parent_table, v_job_id);\n END IF;\nEND LOOP;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('Partition maintenance finished. %s partitions made. %s partitions dropped.', v_create_count, v_drop_count));\n IF v_step_overflow_id IS NOT NULL OR v_step_serial_id IS NOT NULL THEN\n PERFORM fail_job(v_job_id);\n ELSE\n PERFORM close_job(v_job_id);\n END IF;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN RUN MAINTENANCE'')', v_jobmon_schema) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 21226, "num_statements": 200} {"question": "Generate PostgreSQL SQL for: What was the result of Robert L. F. Sikes' election bid?", "schema": "CREATE TABLE table_1342256_10 (result VARCHAR, incumbent VARCHAR)", "sql": "SELECT result FROM table_1342256_10 WHERE incumbent = 'Robert L. F. Sikes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What shows for miles [One Way] when the fans took 340?", "schema": "CREATE TABLE table_name_89 (one_way VARCHAR, miles_ INTEGER, fans_took VARCHAR)", "sql": "SELECT AVG(miles_)[one_way] FROM table_name_89 WHERE fans_took = '340';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'polymorphism' (example 29).", "schema": null, "sql": "drop function polyf(x anycompatiblerange, y anycompatiblearray);", "explanation": "PL/pgSQL object from PostgreSQL core test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'identity': Write the SELECT query (example 114).", "schema": null, "sql": "SELECT * FROM itest7c;", "explanation": "Regression test for Identity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM itest7c) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "What is the minimum playtime for players from each country?", "schema": "CREATE TABLE PlayerPlaytimes (PlayerID int, Country varchar(50), Playtime int); INSERT INTO PlayerPlaytimes (PlayerID, Country, Playtime) VALUES (1, 'USA', 200), (2, 'Canada', 150), (3, 'Australia', 250), (4, 'England', 300), (5, 'USA', 350), (6, 'Canada', 400), (7, 'Australia', 450), (8, 'England', 500);", "sql": "SELECT Country, MIN(Playtime) FROM PlayerPlaytimes GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Calculate the percentage change in the number of tourists visiting India from 2017 to 2019", "schema": "CREATE TABLE IndiaVisitorCount (year INT, tourists INT); INSERT INTO IndiaVisitorCount (year, tourists) VALUES (2017, 16000000), (2019, 18000000);", "sql": "SELECT (SUM(CASE WHEN year = 2019 THEN tourists ELSE 0 END) - SUM(CASE WHEN year = 2017 THEN tourists ELSE 0 END)) * 100.0 / SUM(CASE WHEN year = 2017 THEN tourists ELSE 0 END) AS pct_change FROM IndiaVisitorCount;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 214, "num_statements": 1} {"question": "What's the number of medical supply deliveries to Haiti and Honduras in 2021 and 2022?", "schema": "CREATE TABLE deliveries (id INT, country TEXT, year INT, supplies INT); INSERT INTO deliveries (id, country, year, supplies) VALUES (1, 'Haiti', 2021, 500), (2, 'Honduras', 2022, 700);", "sql": "SELECT supplies FROM deliveries WHERE country IN ('Haiti', 'Honduras') AND year BETWEEN 2021 AND 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of the draft pick player who plays centre position and is going to Calgary Flames?", "schema": "CREATE TABLE table_2897457_2 (nationality VARCHAR, position VARCHAR, nhl_team VARCHAR)", "sql": "SELECT nationality FROM table_2897457_2 WHERE position = 'Centre' AND nhl_team = 'Calgary Flames';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Update the industry for a specific company.", "schema": "CREATE TABLE Companies (id INT, name TEXT, industry TEXT, country TEXT, founder_gender TEXT); INSERT INTO Companies (id, name, industry, country, founder_gender) VALUES (1, 'Acme Inc', 'Tech', 'USA', 'Female'); INSERT INTO Companies (id, name, industry, country, founder_gender) VALUES (2, 'Beta Corp', 'Biotech', 'Canada', 'Male');", "sql": "UPDATE Companies SET industry = 'Fintech' WHERE name = 'Beta Corp';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION levenshtein (text,text,int,int,int) RETURNS int\nAS 'MODULE_PATHNAME','levenshtein_with_costs'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 33).", "schema": null, "sql": "CREATE FUNCTION cursor_fetch() RETURNS int AS $$\nres = plpy.cursor(\"select fname, lname from users\")\nassert len(res.fetch(3)) == 3\nassert len(res.fetch(3)) == 1\nassert len(res.fetch(3)) == 0\nassert len(res.fetch(3)) == 0\ntry:\n # use next() and not __next__(), the method name changed in\n # http://www.python.org/dev/peps/pep-3114/\n next(res)\nexcept StopIteration:\n pass\nelse:\n assert False, \"StopIteration not raised\"\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 456, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which week has Game site of Candlestick Park?", "schema": "CREATE TABLE table_name_32 (week VARCHAR, game_site VARCHAR)", "sql": "SELECT week FROM table_name_32 WHERE game_site = 'candlestick park';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which settlement has the cyrillic and other name of локве (romanian: locve)?", "schema": "CREATE TABLE table_2562572_44 (settlement VARCHAR, cyrillic_name_other_names VARCHAR)", "sql": "SELECT settlement FROM table_2562572_44 WHERE cyrillic_name_other_names = 'Локве (Romanian: Locve)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the displacement when the fuel system is carburettor, and a power of ps (kw; hp), and also has a model of 2000?", "schema": "CREATE TABLE table_name_20 (displacement VARCHAR, model VARCHAR, fuel_system VARCHAR, power VARCHAR)", "sql": "SELECT displacement FROM table_name_20 WHERE fuel_system = 'carburettor' AND power = 'ps (kw; hp)' AND model = '2000';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 2} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 206).", "schema": null, "sql": "SELECT JSON_ARRAY(JSON_ARRAY('{ \"a\" : 123 }' RETURNING varchar(2)));", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_ARRAY(JSON_ARRAY('{ \"a\" : 123 }' RETURNING varchar(2)))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Of weightlifters who weighed more than 136.16, who had the highest Total?", "schema": "CREATE TABLE table_name_82 (total__kg_ INTEGER, bodyweight INTEGER)", "sql": "SELECT MAX(total__kg_) FROM table_name_82 WHERE bodyweight > 136.16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Who are the male founders that have not received any funding?", "schema": "CREATE TABLE founders(id INT, name VARCHAR(50), gender VARCHAR(10), industry VARCHAR(20)); INSERT INTO founders VALUES (1, 'Alice', 'Female', 'Tech'); INSERT INTO founders VALUES (2, 'Bob', 'Male', 'Finance'); CREATE TABLE funding(id INT, founder_id INT, amount INT); INSERT INTO funding VALUES (1, 1, 500000); INSERT INTO funding VALUES (2, 1, 750000);", "sql": "SELECT founders.name FROM founders LEFT JOIN funding ON founders.id = funding.founder_id WHERE founders.gender = 'Male' AND funding.id IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the average rating of hotels in the US that offer virtual tours?", "schema": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT, virtual_tour BOOLEAN); INSERT INTO hotels (hotel_id, hotel_name, country, rating, virtual_tour) VALUES (1, 'Hotel X', 'USA', 4.2, true), (2, 'Hotel Y', 'Canada', 4.5, false);", "sql": "SELECT AVG(rating) FROM hotels WHERE country = 'USA' AND virtual_tour = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who drive 74 laps in a grid larger than 9?", "schema": "CREATE TABLE table_name_25 (driver VARCHAR, laps VARCHAR, grid VARCHAR)", "sql": "SELECT driver FROM table_name_25 WHERE laps = 74 AND grid > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the total production of wells in the Arabian Sea?", "schema": "CREATE TABLE wells (well_id INT, name VARCHAR(50), location VARCHAR(50), production FLOAT); INSERT INTO wells (well_id, name, location, production) VALUES (1, 'F1', 'Arabian Sea', 4000), (2, 'F2', 'Arabian Sea', 3000), (3, 'F3', 'Arabian Sea', 5000);", "sql": "SELECT SUM(production) FROM wells WHERE location = 'Arabian Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_trgm' (example 118).", "schema": null, "sql": "select * from test2 where t = '%line%';", "explanation": "Example query from the 'pg_trgm' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was John Watson's time/retired?", "schema": "CREATE TABLE table_name_46 (time_retired VARCHAR, driver VARCHAR)", "sql": "SELECT time_retired FROM table_name_46 WHERE driver = 'john watson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the naturalisation by marriage with numer of jamaicans granted british citizenship being 3165", "schema": "CREATE TABLE table_11214212_1 (naturalisation_by_marriage VARCHAR, numer_of_jamaicans_granted_british_citizenship VARCHAR)", "sql": "SELECT naturalisation_by_marriage FROM table_11214212_1 WHERE numer_of_jamaicans_granted_british_citizenship = 3165;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "What is the average account balance of customers who are high net worth individuals (HNWI) in the Asia-Pacific region?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(255), account_balance DECIMAL(10, 2), region VARCHAR(50)); INSERT INTO customers (id, name, account_balance, region) VALUES (1, 'Li Wei', 2000000.00, 'Asia-Pacific'), (2, 'Kim Park', 500000.00, 'Asia-Pacific');", "sql": "SELECT AVG(account_balance) FROM customers WHERE region = 'Asia-Pacific' AND account_balance > 1000000.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "How many vegetarian and non-vegetarian dishes are available in each store, and what is the total calorie count for each?", "schema": "CREATE TABLE Dishes (DishID int, Name varchar(50), Type varchar(10), Calories int, StoreID int); INSERT INTO Dishes (DishID, Name, Type, Calories, StoreID) VALUES (1, 'Veggie Burger', 'Vegetarian', 400, 1);", "sql": "SELECT Stores.Name, SUM(CASE WHEN Dishes.Type = 'Vegetarian' THEN 1 ELSE 0 END) AS VegetarianDishes, SUM(CASE WHEN Dishes.Type = 'Non-Vegetarian' THEN 1 ELSE 0 END) AS NonVegetarianDishes, SUM(Dishes.Calories) AS TotalCalories FROM Dishes INNER JOIN Stores ON Dishes.StoreID = Stores.StoreID GROUP BY Stores.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 313, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Status of the dinosaur, whose notes are, \"n coelurosauria\"?", "schema": "CREATE TABLE table_name_73 (status VARCHAR, notes VARCHAR)", "sql": "SELECT status FROM table_name_73 WHERE notes = 'n coelurosauria';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which home team score has an Away team of melbourne?", "schema": "CREATE TABLE table_name_71 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_71 WHERE away_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Delete records in the 'unfair_outcomes' table where the 'outcome' is 'unfavorable' and the 'bias_type' is 'gender'", "schema": "CREATE TABLE unfair_outcomes (id INT PRIMARY KEY, algorithm_name VARCHAR(50), outcome VARCHAR(20), bias_type VARCHAR(20), description TEXT);", "sql": "DELETE FROM unfair_outcomes WHERE outcome = 'unfavorable' AND bias_type = 'gender';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 356).", "schema": null, "sql": "select jsonb_path_query('null', '$.boolean()', silent => true);", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('null', '$.boolean()', silent => true)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the protein name when aa length is 202 aa?", "schema": "CREATE TABLE table_26708105_5 (protein_name VARCHAR, aa_length VARCHAR)", "sql": "SELECT protein_name FROM table_26708105_5 WHERE aa_length = '202 aa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the year that the Roughriders left the conference?", "schema": "CREATE TABLE table_name_25 (year_left INTEGER, mascot VARCHAR)", "sql": "SELECT MAX(year_left) FROM table_name_25 WHERE mascot = 'roughriders';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Find the number of players who have played \"Adventure Game B\" and identify their gender.", "schema": "CREATE TABLE Adventure_Game_B (player_id INT, name VARCHAR(50), age INT, gender VARCHAR(10)); INSERT INTO Adventure_Game_B (player_id, name, age, gender) VALUES (1, 'John Doe', 25, 'Male'), (2, 'Jane Smith', 30, 'Female'), (7, 'David Kim', 29, 'Male');", "sql": "SELECT COUNT(*), gender FROM Adventure_Game_B GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Identify the smart city technology adoption project with the lowest adoption rate", "schema": "CREATE TABLE smart_city_projects (id INT, name VARCHAR(50), location VARCHAR(50), adoption_rate FLOAT);", "sql": "SELECT * FROM smart_city_projects WHERE adoption_rate = (SELECT MIN(adoption_rate) FROM smart_city_projects);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/pg_trgm/pg_trgm--1.3.sql */\n\n-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION pg_trgm\" to load this file. \\quit\n\n-- Deprecated function\nCREATE FUNCTION set_limit(float4)\nRETURNS float4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT VOLATILE PARALLEL UNSAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 314, "num_statements": 1} {"question": "Calculate the average CO2 emissions for projects in the 'environment' schema", "schema": "CREATE SCHEMA IF NOT EXISTS environment; CREATE TABLE environment.projects (id INT, name VARCHAR(100), co2_emissions FLOAT); INSERT INTO environment.projects (id, name, co2_emissions) VALUES (1, 'Solar Farm', 50), (2, 'Wind Turbine Park', 25), (3, 'Hydroelectric Plant', 10);", "sql": "SELECT AVG(co2_emissions) FROM environment.projects;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the match type in Budapest where the opponent was Luxembourg?", "schema": "CREATE TABLE table_name_64 (match_type VARCHAR, location VARCHAR, opponenent VARCHAR)", "sql": "SELECT match_type FROM table_name_64 WHERE location = 'budapest' AND opponenent = 'luxembourg';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest value for Points 2 when the goal average 1 is 0.65?", "schema": "CREATE TABLE table_17359181_1 (points_2 INTEGER, goal_average_1 VARCHAR)", "sql": "SELECT MAX(points_2) FROM table_17359181_1 WHERE goal_average_1 = '0.65';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Delete all workouts performed by 'John Doe' in 'Park City' gym using DELETE command", "schema": "CREATE TABLE workouts (workout_id INT, member_id INT, gym_id INT, workout_date DATE, calories INT); INSERT INTO workouts (workout_id, member_id, gym_id, workout_date, calories) VALUES (1, 1, 1, '2022-01-01', 300), (2, 2, 1, '2022-01-02', 400), (3, 1, 2, '2022-01-03', 500); CREATE TABLE members (member_id INT, name TEXT, age INT, gender TEXT); INSERT INTO members (member_id, name, age, gender) VALUES (1, 'John Doe', 30, 'Male'), (2, 'Jane Doe', 28, 'Female'); CREATE TABLE gyms (gym_id INT, name TEXT, city TEXT); INSERT INTO gyms (gym_id, name, city) VALUES (1, 'Park City', 'New York'), (2, 'Central Park', 'New York');", "sql": "DELETE FROM workouts WHERE member_id IN (SELECT member_id FROM members WHERE name = 'John Doe') AND gym_id IN (SELECT gym_id FROM gyms WHERE city = 'New York' AND name = 'Park City');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "What is the total CO2 emission in the Arctic regions for each year?", "schema": "CREATE TABLE CO2Emissions (region VARCHAR(255), year INT, CO2_emission FLOAT); INSERT INTO CO2Emissions (region, year, CO2_emission) VALUES ('Arctic Ocean', 2019, 120000), ('Arctic Ocean', 2020, 125000), ('Greenland', 2019, 150000), ('Greenland', 2020, 160000);", "sql": "SELECT region, year, SUM(CO2_emission) as total_emission FROM CO2Emissions GROUP BY year, region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total number of workers in factories that have implemented fair labor practices in Spain and Italy?", "schema": "CREATE TABLE factories (factory_id INT, country VARCHAR(50), labor_practices VARCHAR(50)); CREATE TABLE workers (worker_id INT, factory_id INT, position VARCHAR(50)); INSERT INTO factories (factory_id, country, labor_practices) VALUES (1, 'Spain', 'fair'), (2, 'Italy', 'unfair'), (3, 'Spain', 'fair'), (4, 'Italy', 'fair'); INSERT INTO workers (worker_id, factory_id, position) VALUES (1, 1, 'manager'), (2, 1, 'engineer'), (3, 2, 'worker'), (4, 3, 'manager'), (5, 3, 'engineer'), (6, 4, 'manager');", "sql": "SELECT COUNT(workers.worker_id) FROM workers INNER JOIN factories ON workers.factory_id = factories.factory_id WHERE factories.country IN ('Spain', 'Italy') AND factories.labor_practices = 'fair';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the New England Patriots play as the visiting team?", "schema": "CREATE TABLE table_name_23 (date VARCHAR, visiting_team VARCHAR)", "sql": "SELECT date FROM table_name_23 WHERE visiting_team = 'new england patriots';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Add a new record for the 'Indian Ocean' to the 'marine_region' table.", "schema": "CREATE TABLE marine_region (id INT, region_name VARCHAR(50), min_ocean_ph FLOAT, max_ocean_ph FLOAT); INSERT INTO marine_region (id, region_name, min_ocean_ph, max_ocean_ph) VALUES (1, 'Atlantic Ocean', 7.5, 8.1), (2, 'Pacific Ocean', 7.6, 8.2);", "sql": "INSERT INTO marine_region (region_name, min_ocean_ph, max_ocean_ph) VALUES ('Indian Ocean', 7.4, 8.3);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the number of volunteers and total volunteer hours for each program?", "schema": "CREATE TABLE volunteers (volunteer_id INT, program_id VARCHAR(20), hours INT); INSERT INTO volunteers (volunteer_id, program_id, hours) VALUES (1, 'Education', 50), (2, 'Health', 75), (3, 'Education', 100); CREATE TABLE donations (donor_id INT, program_id VARCHAR(20), amount DECIMAL(10,2)); INSERT INTO donations (donor_id, program_id, amount) VALUES (1, 'Education', 500.00), (2, 'Health', 300.00), (3, 'Education', 250.00);", "sql": "SELECT program_id, COUNT(DISTINCT volunteer_id) AS num_volunteers, SUM(hours) AS total_hours FROM volunteers GROUP BY program_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the total revenue for each category in the last month?", "schema": "CREATE TABLE dishes (dish_id INT, dish VARCHAR(50), category VARCHAR(50), created_at TIMESTAMP);CREATE TABLE orders (order_id INT, dish_id INT, price DECIMAL(5,2));", "sql": "SELECT c.category, SUM(o.price) as total_revenue FROM dishes d JOIN orders o ON d.dish_id = o.dish_id WHERE d.created_at >= NOW() - INTERVAL '1 month' GROUP BY c.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "What is the total billing amount for cases with a favorable outcome in the 'California' region?", "schema": "CREATE TABLE cases (id INT, attorney_id INT, outcome TEXT, billing_amount INT); INSERT INTO cases (id, attorney_id, outcome, billing_amount) VALUES (1, 1, 'Favorable', 10000); CREATE TABLE attorneys (id INT, name TEXT, region TEXT, title TEXT); INSERT INTO attorneys (id, name, region, title) VALUES (1, 'Jim Smith', 'California', 'Associate');", "sql": "SELECT SUM(billing_amount) FROM cases JOIN attorneys ON cases.attorney_id = attorneys.id WHERE attorneys.region = 'California' AND cases.outcome = 'Favorable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which surface has a Date of 5 november 2011?", "schema": "CREATE TABLE table_name_40 (surface VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_40 WHERE date = '5 november 2011';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total mental health parity score for healthcare providers working in each state?", "schema": "CREATE TABLE healthcare_providers (id INT, name VARCHAR(100), state VARCHAR(50), mental_health_parity_score INT); INSERT INTO healthcare_providers (id, name, state, mental_health_parity_score) VALUES (1, 'Jack', 'California', 85), (2, 'Kate', 'Texas', 80), (3, 'Luke', 'New York', 90);", "sql": "SELECT state, SUM(mental_health_parity_score) FROM healthcare_providers GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is 2nd Party, when Election is \"1865\"?", "schema": "CREATE TABLE table_name_82 (election VARCHAR)", "sql": "SELECT 2 AS nd_party FROM table_name_82 WHERE election = '1865';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of products whose availability equals to 1?", "schema": "CREATE TABLE view_product_availability (product_id VARCHAR, available_yn VARCHAR); CREATE TABLE products_for_hire (product_name VARCHAR, product_id VARCHAR)", "sql": "SELECT T2.product_name FROM view_product_availability AS T1 JOIN products_for_hire AS T2 ON T1.product_id = T2.product_id WHERE T1.available_yn = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the average age of users who prefer watching sports news?", "schema": "CREATE TABLE users (id INT, age INT, preference VARCHAR(20)); INSERT INTO users (id, age, preference) VALUES (1, 35, 'sports'), (2, 45, 'politics'), (3, 28, 'sports');", "sql": "SELECT AVG(age) FROM users WHERE preference = 'sports';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the director and what number is the episode for episode #1 of Are You Afraid of the Dark season 3?", "schema": "CREATE TABLE table_10470082_4 (director VARCHAR, _number VARCHAR)", "sql": "SELECT COUNT(director) FROM table_10470082_4 WHERE _number = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/pageinspect/pageinspect--1.5.sql */\n\n-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION pageinspect\" to load this file. \\quit\n\n--\n-- get_raw_page()\n--\nCREATE FUNCTION get_raw_page(text, int4)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'get_raw_page'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 338, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 105).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Lillian');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Update the financial capability score of clients in Egypt to 1 point higher than their current score, if their score is currently between 5 and 8.", "schema": "CREATE TABLE financial_capability_eg (client_id INT, financial_capability_score INT, country VARCHAR(50)); INSERT INTO financial_capability_eg (client_id, financial_capability_score, country) VALUES (1, 5, 'Egypt'), (2, 3, 'Egypt'), (3, 6, 'Egypt');", "sql": "WITH updated_scores AS (UPDATE financial_capability_eg SET financial_capability_score = financial_capability_score + 1 WHERE country = 'Egypt' AND financial_capability_score >= 5 AND financial_capability_score <= 8) SELECT * FROM updated_scores;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 245, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the coach who had 215 losses?", "schema": "CREATE TABLE table_name_33 (coach VARCHAR, losses VARCHAR)", "sql": "SELECT coach FROM table_name_33 WHERE losses = 215;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PostgreSQL regression test 'misc_functions': Write the SELECT query (example 47).", "schema": null, "sql": "SELECT test_canonicalize_path('../abc/..');", "explanation": "Regression test for Misc Functions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT test_canonicalize_path('../abc/..')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 8).", "schema": null, "sql": "-- probin should be non-empty for C functions, null everywhere else\nSELECT p1.oid, p1.proname\nFROM pg_proc as p1\nWHERE prolang = 13 AND (probin IS NULL OR probin = '' OR probin = '-');", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the completion dates of all the tests that have result \"Fail\"?", "schema": "CREATE TABLE Student_Tests_Taken (registration_id VARCHAR, test_result VARCHAR); CREATE TABLE Student_Course_Enrolment (date_of_completion VARCHAR, registration_id VARCHAR)", "sql": "SELECT T1.date_of_completion FROM Student_Course_Enrolment AS T1 JOIN Student_Tests_Taken AS T2 ON T1.registration_id = T2.registration_id WHERE T2.test_result = 'Fail';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 252).", "schema": null, "sql": "SELECT * FROM check_test(\n bag_eq(\n 'SELECT id, name FROM annames WHERE name NOT IN (''Anna'', ''Angelina'')',\n 'expect'\n ),\n false,\n 'bag_eq(select, prepared) fail missings',\n '',\n ' Missing records:\n [(](44,Anna|86,Angelina)[)]\n [(](44,Anna|86,Angelina)[)]',\n true\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 321, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What teams rank is higher than 4 with a speed of 104.574mph?", "schema": "CREATE TABLE table_name_91 (team VARCHAR, rank VARCHAR, speed VARCHAR)", "sql": "SELECT team FROM table_name_91 WHERE rank > 4 AND speed = '104.574mph';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average waste production per factory in India and China?", "schema": "CREATE TABLE waste_data (country VARCHAR(255), year INT, quantity INT); INSERT INTO waste_data (country, year, quantity) VALUES ('China', 2018, 5000), ('India', 2018, 3000), ('Bangladesh', 2018, 2000), ('China', 2019, 6000), ('India', 2019, 4000), ('Bangladesh', 2019, 3000);", "sql": "SELECT country, AVG(quantity) as avg_waste FROM waste_data WHERE country IN ('China', 'India') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the venue of the match with a score of 33-22?", "schema": "CREATE TABLE table_name_45 (venue VARCHAR, score VARCHAR)", "sql": "SELECT venue FROM table_name_45 WHERE score = '33-22';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Find the total usage hours of autonomous vehicles in the autonomous_vehicles table", "schema": "CREATE TABLE autonomous_vehicle_usage (id INT PRIMARY KEY, user_id INT, vehicle_id INT, usage_hours TIMESTAMP);", "sql": "SELECT SUM(usage_hours) FROM autonomous_vehicle_usage JOIN autonomous_vehicles ON autonomous_vehicle_usage.vehicle_id = autonomous_vehicles.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "List all vendors that the ethical fashion brand 'EcoFriendlyFashions' sources sustainable materials from, along with the total quantity of materials sourced from each vendor.", "schema": "CREATE TABLE vendor_materials (id INT PRIMARY KEY, brand VARCHAR(255), vendor VARCHAR(255), material_type VARCHAR(255), quantity INT); INSERT INTO vendor_materials (id, brand, vendor, material_type, quantity) VALUES (1, 'EcoFriendlyFashions', 'Vendor1', 'Organic Cotton', 2000), (2, 'EcoFriendlyFashions', 'Vendor2', 'Recycled Polyester', 1500), (3, 'GreenFashions', 'Vendor3', 'Organic Cotton', 1000), (4, 'GreenFashions', 'Vendor4', 'Tencel', 2000);", "sql": "SELECT vm.vendor, SUM(vm.quantity) as total_quantity FROM vendor_materials vm WHERE vm.brand = 'EcoFriendlyFashions' GROUP BY vm.vendor;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "What is the average citizen satisfaction score for public services in Tokyo in 2021?", "schema": "CREATE TABLE TokyoSatisfaction (service VARCHAR(30), score INT, year INT); INSERT INTO TokyoSatisfaction (service, score, year) VALUES ('Public Services', 80, 2021), ('Public Services', 85, 2021), ('Public Services', 75, 2021), ('Public Services', 90, 2021);", "sql": "SELECT AVG(score) FROM TokyoSatisfaction WHERE service = 'Public Services' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 519).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the percentage of prepaid mobile customers in each city who have broadband subscriptions?", "schema": "CREATE TABLE prepaid_city_customers (customer_id INT, city VARCHAR(50), prepaid BOOLEAN); INSERT INTO prepaid_city_customers (customer_id, city, prepaid) VALUES (1, 'Seattle', FALSE), (2, 'Bellevue', TRUE), (3, 'Seattle', FALSE); ALTER TABLE broadband_subscriptions ADD COLUMN city VARCHAR(50); UPDATE broadband_subscriptions SET city = (SELECT city FROM city_customers WHERE city_customers.customer_id = broadband_subscriptions.customer_id);", "sql": "SELECT pc.city, (COUNT(CASE WHEN bs.subscription = TRUE THEN 1 END) * 100.0 / COUNT(bs.customer_id)) AS percentage FROM prepaid_city_customers pc JOIN broadband_subscriptions bs ON pc.customer_id = bs.customer_id GROUP BY pc.city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 230, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Score has Opponents in the final of john bromwich frank sedgman?", "schema": "CREATE TABLE table_name_92 (score VARCHAR, opponents_in_the_final VARCHAR)", "sql": "SELECT score FROM table_name_92 WHERE opponents_in_the_final = 'john bromwich frank sedgman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Create a table for tracking volunteer hours, with columns for VolunteerID, Program, Hours, and Date.", "schema": "CREATE TABLE VolunteerHours (VolunteerID INT, Program VARCHAR(50), Hours INT, VolunteerDate DATE);", "sql": "CREATE TABLE VolunteerHours (VolunteerID INT, Program VARCHAR(50), Hours INT, VolunteerDate DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the winning team of mathiasen motorsports has a pole position of jonathan bomarito, who has the fastest lap?", "schema": "CREATE TABLE table_name_97 (fastest_lap VARCHAR, winning_team VARCHAR, pole_position VARCHAR)", "sql": "SELECT fastest_lap FROM table_name_97 WHERE winning_team = 'mathiasen motorsports' AND pole_position = 'jonathan bomarito';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "What is the maximum labor cost in the affordable housing sector in 2021?", "schema": "CREATE TABLE labor_costs (project_id INT, sector VARCHAR(50), labor_cost FLOAT, year INT); INSERT INTO labor_costs (project_id, sector, labor_cost, year) VALUES (1, 'Affordable Housing', 28000, 2021), (2, 'Conventional', 30000, 2021), (3, 'Affordable Housing', 32000, 2021);", "sql": "SELECT MAX(labor_cost) FROM labor_costs WHERE sector = 'Affordable Housing' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Show the number of military equipment maintenance requests for 'Fighter Jet' type in 2020", "schema": "CREATE TABLE maintenance_requests (request_id INT, equipment_type VARCHAR(255), date DATE); INSERT INTO maintenance_requests (request_id, equipment_type, date) VALUES (4, 'Fighter Jet', '2020-02-05'), (5, 'Fighter Jet', '2020-06-10'), (6, 'Fighter Jet', '2020-12-15');", "sql": "SELECT equipment_type, COUNT(*) FROM maintenance_requests WHERE equipment_type = 'Fighter Jet' AND date BETWEEN '2020-01-01' AND '2020-12-31' GROUP BY equipment_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 46).", "schema": null, "sql": "SELECT q1, q2, q1 * q2 AS multiply FROM INT8_TBL;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT q1, q2, q1 * q2 AS multiply FROM INT8_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the maximum number of tickets sold for any event in the 'events' table?", "schema": "CREATE TABLE events (id INT PRIMARY KEY, event_name VARCHAR(100), event_type VARCHAR(50), num_tickets_sold INT);", "sql": "SELECT MAX(num_tickets_sold) AS max_tickets_sold FROM events;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number is the player that played 1998-2001", "schema": "CREATE TABLE table_10015132_21 (no INTEGER, years_in_toronto VARCHAR)", "sql": "SELECT MIN(no) FROM table_10015132_21 WHERE years_in_toronto = '1998-2001';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average square footage of co-owned properties in the state of New York?", "schema": "CREATE TABLE properties (id INT, state VARCHAR(20), size INT, co_owned BOOLEAN); INSERT INTO properties (id, state, size, co_owned) VALUES (1, 'New York', 1200, TRUE), (2, 'New York', 1500, FALSE), (3, 'New York', 1800, TRUE);", "sql": "SELECT AVG(size) FROM properties WHERE state = 'New York' AND co_owned = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 654).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION throws_like ( TEXT, TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the away score when the home team was Melbourne?", "schema": "CREATE TABLE table_name_84 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_84 WHERE home_team = 'melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the maximum number of public participations in a single initiative?", "schema": "CREATE TABLE participations (initiative_id INT, num_participants INT); INSERT INTO participations (initiative_id, num_participants) VALUES (1, 500), (2, 700), (3, 300), (4, 800), (5, 600);", "sql": "SELECT MAX(num_participants) FROM participations;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what surface was Ivan Lendl a partner with a runner-up outcome and final score of 2–6, 6–7?", "schema": "CREATE TABLE table_name_66 (surface VARCHAR, score_in_the_final VARCHAR, outcome VARCHAR, partner VARCHAR)", "sql": "SELECT surface FROM table_name_66 WHERE outcome = 'runner-up' AND partner = 'ivan lendl' AND score_in_the_final = '2–6, 6–7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "What is the sum of all flood resilience investments in the South and their respective average costs?", "schema": "CREATE TABLE FloodResilience (id INT, project VARCHAR(20), region VARCHAR(20), cost FLOAT); INSERT INTO FloodResilience (id, project, region, cost) VALUES (1, 'FloodGate', 'South', 2000000.0), (2, 'Levee', 'South', 3000000.0), (3, 'FloodGate', 'South', 2500000.0);", "sql": "SELECT project, SUM(cost) as total_cost FROM FloodResilience WHERE region = 'South' GROUP BY project;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the position of the player who's hometown is North Babylon, NY?", "schema": "CREATE TABLE table_20785990_2 (position VARCHAR, home_town VARCHAR)", "sql": "SELECT position FROM table_20785990_2 WHERE home_town = 'North Babylon, NY';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Find the number of icebergs in the Arctic ocean larger than 10 km²", "schema": "CREATE TABLE icebergs (id INT, name VARCHAR(255), size_km2 FLOAT);", "sql": "SELECT COUNT(*) FROM icebergs WHERE size_km2 > 10 AND region = 'Arctic Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Delete all unused autonomous buses in Boston.", "schema": "CREATE TABLE public.buses (id SERIAL PRIMARY KEY, name TEXT, in_use BOOLEAN, city TEXT); INSERT INTO public.buses (name, in_use, city) VALUES ('Autonomous Bus 1', FALSE, 'Boston'), ('Autonomous Bus 2', TRUE, 'Boston');", "sql": "DELETE FROM public.buses WHERE city = 'Boston' AND name LIKE 'Autonomous Bus%' AND in_use = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 98, "num_statements": 1} {"question": "Find the number of articles published on investigative journalism topics in the last 6 months, grouped by month.", "schema": "CREATE TABLE articles (id INT, title VARCHAR(255), publish_date DATE, topic VARCHAR(255)); INSERT INTO articles (id, title, publish_date, topic) VALUES (1, 'Investigative Article 1', '2022-01-01', 'investigative');", "sql": "SELECT COUNT(*), DATE_FORMAT(publish_date, '%Y-%m') AS Month FROM articles WHERE topic = 'investigative' AND publish_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH) GROUP BY Month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What venue had less than 7.4 extra and the result of 4th?", "schema": "CREATE TABLE table_name_58 (venue VARCHAR, extra VARCHAR, result VARCHAR)", "sql": "SELECT venue FROM table_name_58 WHERE extra < 7.4 AND result = '4th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Find the minimum age of all animals in the 'vulnerable' status", "schema": "CREATE TABLE animals (id INT, name VARCHAR(50), status VARCHAR(20), age INT); INSERT INTO animals (id, name, status, age) VALUES (1, 'Tiger', 'Endangered', 10); INSERT INTO animals (id, name, status, age) VALUES (2, 'Elephant', 'Vulnerable', 30); INSERT INTO animals (id, name, status, age) VALUES (3, 'Rhino', 'Critically Endangered', 5);", "sql": "SELECT MIN(age) FROM animals WHERE status = 'Vulnerable';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Queries (example 5).", "schema": null, "sql": "select tcl_record_arg(row('tkey', 42, 'ref2')::T_comp1, 'ref1');", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What's the total number of communication satellites launched by the United States?", "schema": "CREATE TABLE Satellite (Id INT, Name VARCHAR(100), LaunchDate DATETIME, Country VARCHAR(50), Function VARCHAR(50)); INSERT INTO Satellite (Id, Name, LaunchDate, Country, Function) VALUES (2, 'Intelsat 702', '1997-04-03', 'United States', 'Communications');", "sql": "SELECT COUNT(*) FROM Satellite WHERE Country = 'United States' AND Function = 'Communications';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many positions is player Tom Glavine?", "schema": "CREATE TABLE table_21721351_18 (position VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(position) FROM table_21721351_18 WHERE player = 'Tom Glavine';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'drop_if_exists' (example 148).", "schema": null, "sql": "-- cleanup\nDROP FUNCTION test_ambiguous_funcname(int);", "explanation": "PL/pgSQL object from PostgreSQL core test for Drop If Exists.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When has a cause of gas explosion, a Death toll smaller than 63, and a Location of penygraig?", "schema": "CREATE TABLE table_name_60 (date VARCHAR, location VARCHAR, cause VARCHAR, death_toll VARCHAR)", "sql": "SELECT date FROM table_name_60 WHERE cause = 'gas explosion' AND death_toll < 63 AND location = 'penygraig';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "What is the average property size in the sustainable_urbanism table?", "schema": "CREATE TABLE sustainable_urbanism (property_id INT, size FLOAT, location VARCHAR(255)); INSERT INTO sustainable_urbanism (property_id, size, location) VALUES (1, 1200, 'Eco City'), (2, 1500, 'Green Valley');", "sql": "SELECT AVG(size) FROM sustainable_urbanism;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 176).", "schema": null, "sql": "SELECT cube_enlarge('(0)'::cube, 1, 1);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the opponent which has a home/away of home and date of july 27", "schema": "CREATE TABLE table_name_9 (opponent VARCHAR, home_away VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_9 WHERE home_away = 'home' AND date = 'july 27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What are the average safety scores for vehicles released in 2018 and 2019?", "schema": "CREATE TABLE vehicle_safety_scores (make VARCHAR(100), model VARCHAR(100), safety_score INT, year INT);", "sql": "SELECT AVG(safety_score) FROM vehicle_safety_scores WHERE year IN (2018, 2019) GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "What is the minimum budget for a single public works project in the state of California?", "schema": "CREATE TABLE project (id INT PRIMARY KEY, name TEXT, budget INT, status TEXT, city_id INT, FOREIGN KEY (city_id) REFERENCES city(id));", "sql": "SELECT MIN(budget) FROM project WHERE city_id IN (SELECT id FROM city WHERE state = 'CA') AND status = 'Open';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Per capita income has a Median family income of $50,755?", "schema": "CREATE TABLE table_name_38 (per_capita_income VARCHAR, median_family_income VARCHAR)", "sql": "SELECT per_capita_income FROM table_name_38 WHERE median_family_income = '$50,755';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average donation size by individual donors in the healthcare sector?", "schema": "CREATE TABLE donors (id INT, name VARCHAR(50), amount INT, sector VARCHAR(20)); INSERT INTO donors (id, name, amount, sector) VALUES (1, 'John', 75, 'education'), (2, 'Jane', 120, 'health'), (3, 'Mike', 30, 'education'), (4, 'Olivia', 80, 'healthcare'), (5, 'Patrick', 180, 'healthcare');", "sql": "SELECT AVG(amount) FROM donors WHERE sector = 'healthcare' AND id NOT IN (SELECT DISTINCT org_id FROM grants);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Add a new wind turbine with id 5 and capacity 2000 in the 'renewable_energy' table", "schema": "CREATE TABLE renewable_energy (id INT, type VARCHAR(50), capacity INT);", "sql": "INSERT INTO renewable_energy (id, type, capacity) VALUES (5, 'wind turbine', 2000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Increase the rating of product 3 by 0.5.", "schema": "CREATE TABLE products (product_id INT, rating FLOAT); INSERT INTO products (product_id, rating) VALUES (1, 4.5), (2, 3.2), (3, 4.8);", "sql": "UPDATE products SET rating = rating + 0.5 WHERE product_id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the average final rank for loe more than 10 and point less than 43", "schema": "CREATE TABLE table_name_8 (final_rank INTEGER, lose VARCHAR, point VARCHAR)", "sql": "SELECT AVG(final_rank) FROM table_name_8 WHERE lose > 10 AND point < 43;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Calculate the percentage of employees who received a promotion in the last 6 months, and display the result with two decimal places.", "schema": "CREATE TABLE Employees (EmployeeID INT, PromotionDate DATE);", "sql": "SELECT ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Employees) , 2) AS PromotionPercentage FROM Employees WHERE PromotionDate >= DATEADD(month, -6, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which organisation type hires most research staff?", "schema": "CREATE TABLE Research_Staff (employer_organisation_id VARCHAR); CREATE TABLE Organisations (organisation_type VARCHAR, organisation_id VARCHAR)", "sql": "SELECT T1.organisation_type FROM Organisations AS T1 JOIN Research_Staff AS T2 ON T1.organisation_id = T2.employer_organisation_id GROUP BY T1.organisation_type ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 70).", "schema": null, "sql": "SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 167, "num_statements": 1} {"question": "What is the maximum number of simultaneous high severity vulnerabilities that have been discovered in the past month?", "schema": "CREATE TABLE vulnerability_timeline(id INT, severity VARCHAR(50), vulnerability_date DATE, vulnerabilities INT);", "sql": "SELECT severity, MAX(vulnerabilities) as max_simultaneous_vulnerabilities FROM vulnerability_timeline WHERE severity = 'high' AND vulnerability_date > DATE(NOW()) - INTERVAL 30 DAY;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 20).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION run_extra_fails() RETURNS SETOF TEXT LANGUAGE plpgsql AS $$\nDECLARE\n allowed_privs TEXT[];\n test_privs TEXT[] := '{}';\n missing_privs TEXT[] := '{}';\n tap record;\n last_index INTEGER;\nBEGIN\n -- Test table failure.\n allowed_privs := _table_privs();\n last_index := array_upper(allowed_privs, 1);\n FOR i IN 1..last_index - 2 LOOP\n test_privs := test_privs || allowed_privs[i];\n END LOOP;\n FOR i IN last_index - 1..last_index LOOP\n missing_privs := missing_privs || allowed_privs[i];\n END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n table_privs_are( 'ha', 'sometab', current_user, test_privs, 'whatever' ),\n false,\n 'table_privs_are(sch, tab, role, some privs, desc)',\n 'whatever',\n ' Extra privileges:\n ' || array_to_string(missing_privs, E'\\n ')\n ) AS b LOOP RETURN NEXT tap.b; END LOOP;\n\n FOR tap IN SELECT * FROM check_test(\n table_privs_are( 'sometab', current_user, test_privs, 'whatever' ),\n false,\n 'table_privs_are(tab, role, some privs, desc)',\n 'whatever',\n ' Extra privileges:\n ' || array_to_string(missing_privs, E'\\n ')\n ) AS b LOOP RETURN NEXT tap.b; END LOOP;\nEND;\n$$;", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1330, "num_statements": 17} {"question": "List the number of mental health appointments for each cultural competency level, in descending order.", "schema": "CREATE TABLE CulturalCompetency (ID INT, Level TEXT); INSERT INTO CulturalCompetency (ID, Level) VALUES (1, 'Beginner'); INSERT INTO CulturalCompetency (ID, Level) VALUES (2, 'Intermediate'); INSERT INTO CulturalCompetency (ID, Level) VALUES (3, 'Advanced'); CREATE TABLE MentalHealthAppointment (AppointmentID INT, CulturalCompetencyID INT);", "sql": "SELECT CulturalCompetencyID, COUNT(AppointmentID) as NumAppointments FROM MentalHealthAppointment GROUP BY CulturalCompetencyID ORDER BY NumAppointments DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What 2007-08 season has marcianise as the city?", "schema": "CREATE TABLE table_name_60 (city VARCHAR)", "sql": "SELECT 2007 AS _08_season FROM table_name_60 WHERE city = 'marcianise';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What percentage of sustainable fabric sourcing is done from African countries?", "schema": "CREATE TABLE FabricSourcing (Brand VARCHAR(255), Country VARCHAR(255), FabricType VARCHAR(255), Quantity INT); INSERT INTO FabricSourcing (Brand, Country, FabricType, Quantity) VALUES ('BrandD', 'EG', 'Organic Cotton', 5000), ('BrandE', 'NG', 'Recycled Polyester', 7000), ('BrandF', 'KE', 'Tencel', 6000);", "sql": "SELECT (COUNT(*) * 100.0 / (SELECT COUNT(*) FROM FabricSourcing)) AS Percentage FROM FabricSourcing WHERE Country IN ('EG', 'NG', 'KE');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which member is from the state of SA and the grey electorate?", "schema": "CREATE TABLE table_name_74 (member VARCHAR, state VARCHAR, electorate VARCHAR)", "sql": "SELECT member FROM table_name_74 WHERE state = 'sa' AND electorate = 'grey';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the distribution of artists by the medium they work in?", "schema": "CREATE TABLE artists (id INT, name VARCHAR(255), birth_date DATE, medium VARCHAR(50));", "sql": "SELECT medium, COUNT(*) as artist_count FROM artists GROUP BY medium;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What county is the township of Osborn in?", "schema": "CREATE TABLE table_18600760_15 (county VARCHAR, township VARCHAR)", "sql": "SELECT county FROM table_18600760_15 WHERE township = 'Osborn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location of the 200m backstroke?", "schema": "CREATE TABLE table_name_75 (location VARCHAR, event VARCHAR)", "sql": "SELECT location FROM table_name_75 WHERE event = '200m backstroke';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List all opponents from the 4-4 scoring game.", "schema": "CREATE TABLE table_24561550_1 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_24561550_1 WHERE record = '4-4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Which fair trade organizations are involved in shoe production in Portugal?", "schema": "CREATE TABLE Shoes (id INT, name VARCHAR(255), style VARCHAR(255), price DECIMAL(10, 2), country VARCHAR(255), fair_trade_org VARCHAR(255)); INSERT INTO Shoes (id, name, style, price, country, fair_trade_org) VALUES (1, 'Sneakers', 'Casual', 79.99, 'Portugal', 'FTA Portugal'); INSERT INTO Shoes (id, name, style, price, country, fair_trade_org) VALUES (2, 'Sandals', 'Summer', 49.99, 'Portugal', 'FTA Iberia');", "sql": "SELECT DISTINCT fair_trade_org FROM Shoes WHERE country = 'Portugal' AND fair_trade_org IS NOT NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "How many startups have been founded by individuals with disabilities in the last 3 years?", "schema": "CREATE TABLE founders(id INT, startup_id INT, founder_name TEXT, founder_identity TEXT); INSERT INTO founders VALUES (1, 1, 'John Doe', 'White Male'); INSERT INTO founders VALUES (2, 1, 'Jane Smith', 'Female Asian'); INSERT INTO founders VALUES (3, 2, 'Alice Johnson', 'Black Female');", "sql": "SELECT COUNT(*) FROM founders JOIN startups ON founders.startup_id = startups.id WHERE founders.founder_identity = 'Individual with Disability' AND startups.founding_year >= YEAR(CURRENT_DATE) - 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the result of the europe/africa group i, round robin game in Murcia (esp) before 1999?", "schema": "CREATE TABLE table_name_9 (result VARCHAR, competition VARCHAR, location VARCHAR, year VARCHAR)", "sql": "SELECT result FROM table_name_9 WHERE location = 'murcia (esp)' AND year < 1999 AND competition = 'europe/africa group i, round robin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Populate the games table with new data", "schema": "CREATE TABLE games (game_id INT PRIMARY KEY, name VARCHAR(50), genre VARCHAR(50), rating DECIMAL(3,2));", "sql": "INSERT INTO games (game_id, name, genre, rating) VALUES (1, 'Apex Legends', 'Battle Royale', 8.5), (2, 'Valorant', 'First Person Shooter', 8.3), (3, 'Among Us', 'Party Game', 8.8);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "How many fans are from each region in the fan_demographics table?", "schema": "CREATE TABLE fan_demographics (fan_id INT, fan_name VARCHAR(50), region VARCHAR(50)); INSERT INTO fan_demographics (fan_id, fan_name, region) VALUES (1, 'FanA', 'North America'), (2, 'FanB', 'South America'), (3, 'FanC', 'Asia'), (4, 'FanD', 'Europe');", "sql": "SELECT region, COUNT(*) as num_fans FROM fan_demographics GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show names for all employees who have certificates on both Boeing 737-800 and Airbus A340-300.", "schema": "CREATE TABLE Employee (name VARCHAR, eid VARCHAR); CREATE TABLE Certificate (eid VARCHAR, aid VARCHAR); CREATE TABLE Aircraft (aid VARCHAR, name VARCHAR)", "sql": "SELECT T1.name FROM Employee AS T1 JOIN Certificate AS T2 ON T1.eid = T2.eid JOIN Aircraft AS T3 ON T3.aid = T2.aid WHERE T3.name = 'Boeing 737-800' INTERSECT SELECT T1.name FROM Employee AS T1 JOIN Certificate AS T2 ON T1.eid = T2.eid JOIN Aircraft AS T3 ON T3.aid = T2.aid WHERE T3.name = 'Airbus A340-300';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 309, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Moondancer's Primary Military Speciality?", "schema": "CREATE TABLE table_name_39 (primary_military_speciality VARCHAR, code_name VARCHAR)", "sql": "SELECT primary_military_speciality FROM table_name_39 WHERE code_name = 'moondancer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the content of the Television service of Vesti?", "schema": "CREATE TABLE table_name_58 (content VARCHAR, television_service VARCHAR)", "sql": "SELECT content FROM table_name_58 WHERE television_service = 'vesti';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which highest Overall has a Pick # of 4, and a Round larger than 7?", "schema": "CREATE TABLE table_name_65 (overall INTEGER, pick__number VARCHAR, round VARCHAR)", "sql": "SELECT MAX(overall) FROM table_name_65 WHERE pick__number = 4 AND round > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Count the number of patients who improved after therapy in the United States?", "schema": "CREATE TABLE patient_outcomes (patient_id INT, improvement_status VARCHAR(255), country VARCHAR(255)); INSERT INTO patient_outcomes (patient_id, improvement_status, country) VALUES (1, 'Improved', 'USA'); INSERT INTO patient_outcomes (patient_id, improvement_status, country) VALUES (2, 'Not Improved', 'USA');", "sql": "SELECT COUNT(*) FROM patient_outcomes WHERE improvement_status = 'Improved' AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which title's genre was jazz when it was nominated in 1996?", "schema": "CREATE TABLE table_name_15 (title VARCHAR, year VARCHAR, genre VARCHAR, result VARCHAR)", "sql": "SELECT title FROM table_name_15 WHERE genre = 'jazz' AND result = 'nominated' AND year = '1996';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHat is the Valency change of associative type?", "schema": "CREATE TABLE table_name_1 (valency_change VARCHAR, type VARCHAR)", "sql": "SELECT valency_change FROM table_name_1 WHERE type = 'associative';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 123).", "schema": null, "sql": "SELECT '0,0,1'::cube @> '0,0,0'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many registed students do each course have? List course name and the number of their registered students?", "schema": "CREATE TABLE students (student_id VARCHAR); CREATE TABLE courses (course_name VARCHAR, course_id VARCHAR); CREATE TABLE student_course_registrations (course_id VARCHAR, student_id VARCHAR)", "sql": "SELECT T3.course_name, COUNT(*) FROM students AS T1 JOIN student_course_registrations AS T2 ON T1.student_id = T2.student_id JOIN courses AS T3 ON T2.course_id = T3.course_id GROUP BY T2.course_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "How many impact investments were made by Blue Capital in regions with high poverty rates?", "schema": "CREATE TABLE Blue_Capital (id INT, region VARCHAR(20), impact_investment FLOAT); INSERT INTO Blue_Capital (id, region, impact_investment) VALUES (1, 'Africa', 200000), (2, 'Asia', 300000);", "sql": "SELECT SUM(impact_investment) FROM Blue_Capital WHERE region IN ('Africa', 'Asia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In 1900 with 62 wins and a win percentage less than 0.457, what was the GB [c]?", "schema": "CREATE TABLE table_name_98 (gb_ VARCHAR, c_ VARCHAR, reds_season VARCHAR, win_percentage VARCHAR, wins VARCHAR)", "sql": "SELECT gb_[c_] FROM table_name_98 WHERE win_percentage < 0.457 AND wins = 62 AND reds_season = 1900;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "What is the maximum calories burned in a single workout by members aged 40-50?", "schema": "CREATE TABLE workout_data (member_id INT, calories FLOAT, workout_date DATE); INSERT INTO workout_data (member_id, calories, workout_date) VALUES (1, 300, '2021-01-15'), (2, 450, '2022-03-28');", "sql": "SELECT MAX(calories) FROM workout_data JOIN members ON workout_data.member_id = members.member_id WHERE members.age BETWEEN 40 AND 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 534).", "schema": null, "sql": "select jsonb_path_query('\"12:34:56\"', '$.time().string()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"12:34:56\"', '$.time().string()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total waste generation by material type in Tokyo?", "schema": "CREATE TABLE waste_generation (location VARCHAR(50), material_type VARCHAR(50), quantity INT); INSERT INTO waste_generation (location, material_type, quantity) VALUES ('Tokyo', 'Plastic', 1200), ('Tokyo', 'Paper', 1800), ('Tokyo', 'Metal', 1400);", "sql": "SELECT material_type, SUM(quantity) FROM waste_generation WHERE location = 'Tokyo' GROUP BY material_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has dallas cowboys as the opponent?", "schema": "CREATE TABLE table_name_91 (date VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_91 WHERE opponent = 'dallas cowboys';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'multirangetypes' (example 528).", "schema": null, "sql": "insert into float8multirange_test values(float8multirange(float8range(-100.00007, '1.111113e9')), 42);", "explanation": "DML from PostgreSQL core regression test for Multirangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the average length of dams built between 1950 and 1960?", "schema": "CREATE TABLE Dams (name TEXT, year INT, length FLOAT, location TEXT);", "sql": "SELECT AVG(length) FROM Dams WHERE year BETWEEN 1950 AND 1960;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the average cost of materials for bridge construction projects in the state of California?", "schema": "CREATE TABLE bridge (id INT, name TEXT, state TEXT, cost FLOAT); INSERT INTO bridge (id, name, state, cost) VALUES (1, 'Bridge A', 'California', 5000000); INSERT INTO bridge (id, name, state, cost) VALUES (2, 'Bridge B', 'California', 7000000);", "sql": "SELECT AVG(cost) FROM bridge WHERE state = 'California' AND name LIKE '%bridge%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 6).", "schema": null, "sql": "CREATE FUNCTION heap_page_item_attrs(\n IN page bytea,\n IN rel_oid regclass,\n OUT lp smallint,\n OUT lp_off smallint,\n OUT lp_flags smallint,\n OUT lp_len smallint,\n OUT t_xmin xid,\n OUT t_xmax xid,\n OUT t_field3 int4,\n OUT t_ctid tid,\n OUT t_infomask2 integer,\n OUT t_infomask integer,\n OUT t_hoff smallint,\n OUT t_bits text,\n OUT t_oid oid,\n OUT t_attrs bytea[]\n )\nRETURNS SETOF record AS $$\nSELECT * from heap_page_item_attrs(page, rel_oid, false);\n$$ LANGUAGE SQL;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 516, "num_statements": 2} {"question": "What is the total number of visitors from North America who attended all exhibitions?", "schema": "CREATE TABLE Exhibitions (ExhibitionID INT, ExhibitionName VARCHAR(255), Country VARCHAR(255)); INSERT INTO Exhibitions (ExhibitionID, ExhibitionName, Country) VALUES (1, 'Contemporary Art Exhibition', 'USA'), (2, 'Modern Art Exhibition', 'Canada'), (3, 'Impressionist Exhibition', 'Mexico');", "sql": "SELECT COUNT(DISTINCT Country) FROM Exhibitions WHERE Country IN ('USA', 'Canada', 'Mexico');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many movies were made before 2000?", "schema": "CREATE TABLE Movie (YEAR INTEGER)", "sql": "SELECT COUNT(*) FROM Movie WHERE YEAR < 2000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "How many individuals have been incarcerated in the past month, broken down by the type of facility and the number of prior offenses?", "schema": "CREATE TABLE incarceration_records (id INT, facility_type TEXT, num_prior_offenses INT, incarceration_date DATE);", "sql": "SELECT facility_type, num_prior_offenses, COUNT(*) FROM incarceration_records WHERE incarceration_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) GROUP BY facility_type, num_prior_offenses;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "What is the total drug approval cost for 'DrugE' in '2020'?", "schema": "CREATE TABLE approvals (drug_name TEXT, year INTEGER, cost INTEGER); INSERT INTO approvals (drug_name, year, cost) VALUES ('DrugE', 2020, 3000000);", "sql": "SELECT cost FROM approvals WHERE drug_name = 'DrugE' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What was the total number of tickets sold for theater performances in Q1 2022?", "schema": "CREATE TABLE Events (EventID INT, EventType VARCHAR(50), StartDate DATE, EndDate DATE); INSERT INTO Events (EventID, EventType, StartDate, EndDate) VALUES (1, 'Dance Performance', '2022-04-01', '2022-04-03'), (2, 'Theater Performance', '2022-01-01', '2022-01-31'); CREATE TABLE Tickets (TicketID INT, EventID INT, Quantity INT); INSERT INTO Tickets (TicketID, EventID, Quantity) VALUES (1, 1, 100), (2, 2, 200);", "sql": "SELECT SUM(Quantity) FROM Events INNER JOIN Tickets ON Events.EventID = Tickets.EventID WHERE Events.EventType = 'Theater Performance' AND QUARTER(StartDate) = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "What is the average monetary aid received per family in Central America, grouped by disaster type, ordered by the highest average?", "schema": "CREATE TABLE disaster_response (family_id INT, region VARCHAR(20), disaster_type VARCHAR(20), amount_aid FLOAT); INSERT INTO disaster_response (family_id, region, disaster_type, amount_aid) VALUES (1, 'Central America', 'Flood', 5000), (2, 'Central America', 'Earthquake', 7000), (3, 'Central America', 'Flood', 6000);", "sql": "SELECT disaster_type, AVG(amount_aid) as avg_aid FROM disaster_response WHERE region = 'Central America' GROUP BY disaster_type ORDER BY avg_aid DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Delete the garment records with a price less than 20.00 or greater than 40.00 in the Garment table.", "schema": "CREATE TABLE Garment (garment_id INT PRIMARY KEY, garment_name VARCHAR(50), category VARCHAR(50), price DECIMAL(10,2)); INSERT INTO Garment (garment_id, garment_name, category, price) VALUES (1, 'Cotton T-Shirt', 'Tops', 20.00), (2, 'Jeans Pants', 'Bottoms', 40.00), (3, 'Linen Blouse', 'Tops', 30.00);", "sql": "DELETE FROM Garment WHERE price < 20.00 OR price > 40.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Find the total number of marine species recorded in the North Atlantic and South Atlantic.", "schema": "CREATE TABLE marine_species (id INT, species_name VARCHAR(255), region VARCHAR(255)); INSERT INTO marine_species (id, species_name, region) VALUES (1, 'Oceanic whitetip shark', 'North Atlantic');", "sql": "(SELECT COUNT(*) FROM marine_species WHERE region IN ('North Atlantic', 'South Atlantic'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many feet in length are there when the length is 106.1 meters?", "schema": "CREATE TABLE table_16226584_1 (length_feet VARCHAR, length_meters VARCHAR)", "sql": "SELECT COUNT(length_feet) FROM table_16226584_1 WHERE length_meters = '106.1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total number of users who have posted at least one post and have at least one follower in the 'social_media' and 'user_followers' tables?", "schema": "CREATE TABLE social_media (user_id INT, posts_count INT); CREATE TABLE user_followers (user_id INT, followers_count INT);", "sql": "SELECT COUNT(DISTINCT sm.user_id) FROM social_media sm INNER JOIN user_followers uf ON sm.user_id = uf.user_id WHERE sm.posts_count > 0 AND uf.followers_count > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Away team played when they had a Crowd of over 29,000 people?", "schema": "CREATE TABLE table_name_14 (away_team VARCHAR, crowd INTEGER)", "sql": "SELECT away_team FROM table_name_14 WHERE crowd > 29 OFFSET 000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show the number of trips taken by each passenger on the 'Green Line' during the month of January 2023", "schema": "CREATE TABLE passengers (passenger_id INT, passenger_name VARCHAR(20)); CREATE TABLE passenger_trips (trip_id INT, passenger_id INT, route_id INT, trip_date DATE);", "sql": "SELECT passengers.passenger_name, COUNT(passenger_trips.trip_id) FROM passengers JOIN passenger_trips ON passengers.passenger_id = passenger_trips.passenger_id WHERE passenger_trips.route_id = 2 AND passenger_trips.trip_date BETWEEN '2023-01-01' AND '2023-01-31' GROUP BY passengers.passenger_id, passengers.passenger_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 323, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the sum of Goals against that has the Goals for larger than 10, and the Position of 3, and the Wins smaller than 6?", "schema": "CREATE TABLE table_name_14 (goals_against INTEGER, wins VARCHAR, goals_for VARCHAR, position VARCHAR)", "sql": "SELECT SUM(goals_against) FROM table_name_14 WHERE goals_for > 10 AND position = 3 AND wins < 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of FA Cup goals when there are 19 league goals?", "schema": "CREATE TABLE table_name_5 (fa_cup_goals INTEGER, league_goals VARCHAR)", "sql": "SELECT SUM(fa_cup_goals) FROM table_name_5 WHERE league_goals = '19';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Insert a new record into the \"MarinePollution\" table with values (1, 'Asia', 'Oil Spill')", "schema": "CREATE TABLE MarinePollution (Id INT, Region VARCHAR(20), Type VARCHAR(10));", "sql": "INSERT INTO MarinePollution (Id, Region, Type) VALUES (1, 'Asia', 'Oil Spill');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Find the youngest female patient in the 'patients' table.", "schema": "CREATE TABLE patients (patient_id INT, name VARCHAR(50), age INT, gender VARCHAR(10), condition VARCHAR(50)); INSERT INTO patients (patient_id, name, age, gender, condition) VALUES (1, 'John Doe', 30, 'Male', 'Anxiety Disorder'); INSERT INTO patients (patient_id, name, age, gender, condition) VALUES (2, 'Jane Smith', 35, 'Female', 'Depression'); INSERT INTO patients (patient_id, name, age, gender, condition) VALUES (4, 'Bob Brown', 45, 'Male', 'Depression');", "sql": "SELECT * FROM patients WHERE gender = 'Female' ORDER BY age ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the least enrolled when the mascot was the Trojans?", "schema": "CREATE TABLE table_name_8 (enrollment INTEGER, mascot VARCHAR)", "sql": "SELECT MIN(enrollment) FROM table_name_8 WHERE mascot = 'trojans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Update the organic_produce to false for record with restaurant_id 123 in the sustainable_sourcing table", "schema": "CREATE TABLE sustainable_sourcing (restaurant_id INT, organic_produce BOOLEAN);", "sql": "UPDATE sustainable_sourcing SET organic_produce = false WHERE restaurant_id = 123;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the average rating for public safety and waste management in CityX?", "schema": "CREATE TABLE Feedback (City VARCHAR(20), Category VARCHAR(20), Rating INT); INSERT INTO Feedback (City, Category, Rating) VALUES ('CityX', 'Public Safety', 7); INSERT INTO Feedback (City, Category, Rating) VALUES ('CityX', 'Waste Management', 8);", "sql": "SELECT City, AVG(CASE WHEN Category = 'Public Safety' THEN Rating ELSE 0 END) AS 'Public Safety Avg Rating', AVG(CASE WHEN Category = 'Waste Management' THEN Rating ELSE 0 END) AS 'Waste Management Avg Rating' FROM Feedback WHERE City = 'CityX' GROUP BY City;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 259, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which position has new Jersey Devils as the nhl team?", "schema": "CREATE TABLE table_2897457_3 (position VARCHAR, nhl_team VARCHAR)", "sql": "SELECT position FROM table_2897457_3 WHERE nhl_team = 'New Jersey Devils';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show the average population size for each species in the species table.", "schema": "CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(50), population INT);", "sql": "SELECT name, AVG(population) as avg_population FROM species GROUP BY name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the total production quantity (in metric tons) of Europium from the mine with the ID 2 for the year 2018?", "schema": "CREATE TABLE production (id INT, mine_id INT, year INT, element TEXT, production_quantity INT); INSERT INTO production (id, mine_id, year, element, production_quantity) VALUES (1, 2, 2018, 'Europium', 120), (2, 3, 2018, 'Europium', 180), (3, 4, 2018, 'Europium', 240), (4, 2, 2018, 'Gadolinium', 300), (5, 3, 2018, 'Gadolinium', 420), (6, 4, 2018, 'Gadolinium', 540);", "sql": "SELECT SUM(production_quantity) FROM production WHERE mine_id = 2 AND year = 2018 AND element = 'Europium';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average cultural competency score for mental health providers in each state?", "schema": "CREATE TABLE mental_health_providers (id INT, name VARCHAR(50), state VARCHAR(50), cultural_competency_score DECIMAL(3,2)); INSERT INTO mental_health_providers (id, name, state, cultural_competency_score) VALUES (1, 'Dr. Sarah Johnson', 'California', 4.75), (2, 'Dr. Michael Davis', 'Texas', 4.50), (3, 'Dr. Emily Garcia', 'Florida', 4.25);", "sql": "SELECT state, AVG(cultural_competency_score) FROM mental_health_providers GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 58).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : [null]}]', 'pg_ndistinct');", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : [null]}]', 'pg_ndistinct')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Which investment strategies have a return on investment (ROI) greater than 5% and have at least one client utilizing them?", "schema": "CREATE TABLE InvestmentStrategies (StrategyID int, StrategyName varchar(50), ROI decimal(5,2)); INSERT INTO InvestmentStrategies (StrategyID, StrategyName, ROI) VALUES (1, 'Conservative', 2), (2, 'Moderate', 3), (3, 'Aggressive', 5), (4, 'High Risk', 10); CREATE TABLE ClientStrategies (ClientID int, StrategyID int); INSERT INTO ClientStrategies (ClientID, StrategyID) VALUES (10, 1), (11, 1), (12, 2), (13, 3), (14, 2), (15, 4);", "sql": "SELECT i.StrategyName, i.ROI FROM InvestmentStrategies i INNER JOIN ClientStrategies cs ON i.StrategyID = cs.StrategyID WHERE i.ROI > 5 GROUP BY i.StrategyName, i.ROI HAVING COUNT(cs.ClientID) > 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "What is the total number of cultural heritage sites in Spain that were built before the year 1800?", "schema": "CREATE TABLE CulturalHeritageSites (name VARCHAR(50), location VARCHAR(20), year INT);", "sql": "SELECT COUNT(*) FROM CulturalHeritageSites WHERE location = 'Spain' AND year < 1800;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What's the average age of players who play sports games in South America?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10), Location VARCHAR(20)); INSERT INTO Players (PlayerID, Age, Gender, Location) VALUES (1, 22, 'Female', 'Brazil'); INSERT INTO Players (PlayerID, Age, Gender, Location) VALUES (2, 35, 'Male', 'Argentina'); CREATE TABLE Games (GameID INT, GameName VARCHAR(20), Genre VARCHAR(20)); INSERT INTO Games (GameID, GameName, Genre) VALUES (1, 'Soccer Star', 'Sports');", "sql": "SELECT AVG(Players.Age) FROM Players INNER JOIN Games ON Players.Location = Games.GameName WHERE Games.Genre = 'Sports' AND Players.Location IN ('Brazil', 'Argentina');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "how many wildlife habitats are there in each region?", "schema": "CREATE TABLE wildlife_habitats (id INT, region VARCHAR(255), habitat_type VARCHAR(255));", "sql": "SELECT region, COUNT(DISTINCT id) as num_habitats FROM wildlife_habitats GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PostgreSQL regression test 'copy2': Write the SELECT query (example 240).", "schema": null, "sql": "SELECT * FROM check_ign_err2;", "explanation": "Regression test for Copy2 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_ign_err2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home city for the team whose home ground is dnb nor arena?", "schema": "CREATE TABLE table_2522473_1 (home_city VARCHAR, home_ground VARCHAR)", "sql": "SELECT home_city FROM table_2522473_1 WHERE home_ground = 'DnB Nor Arena';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did they play at the wachovia center?", "schema": "CREATE TABLE table_name_75 (date VARCHAR, location_attendance VARCHAR)", "sql": "SELECT date FROM table_name_75 WHERE location_attendance = 'wachovia center';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 17).", "schema": null, "sql": "create index concurrently on idxpart (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": true, "sql_length": 41, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance when North Melbourne was the away team?", "schema": "CREATE TABLE table_name_77 (crowd INTEGER, away_team VARCHAR)", "sql": "SELECT AVG(crowd) FROM table_name_77 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of climate finance projects in Southeast Asia, and how many of them were successful?", "schema": "CREATE TABLE climate_finance (region VARCHAR(50), project VARCHAR(50), success BOOLEAN); INSERT INTO climate_finance (region, project, success) VALUES ('Southeast Asia', 'Solar Power Plant', TRUE), ('Southeast Asia', 'Wind Farm', FALSE);", "sql": "SELECT COUNT(*), SUM(success) FROM climate_finance WHERE region = 'Southeast Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what score has a location and attendance of montreal forum and the date of may 10?", "schema": "CREATE TABLE table_name_49 (score VARCHAR, location_attendance VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_49 WHERE location_attendance = 'montreal forum' AND date = 'may 10';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the average duration of space missions for each astronaut?", "schema": "CREATE TABLE astronauts (id INT, name VARCHAR(50));CREATE TABLE missions (id INT, astronaut_id INT, duration INT); INSERT INTO astronauts VALUES (1, 'Melissa Lewis'); INSERT INTO missions VALUES (1, 1), (2, 1), (3, 1); INSERT INTO missions VALUES (1, 1), (2, 1), (3, 1), (4, 1);", "sql": "SELECT astronauts.name, AVG(missions.duration) as avg_duration FROM astronauts INNER JOIN missions ON astronauts.id = missions.astronaut_id GROUP BY astronauts.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the home team that played Collingwood?", "schema": "CREATE TABLE table_name_70 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_70 WHERE away_team = 'collingwood';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the average climate finance provided to Indigenous communities in Canada for climate communication initiatives between 2016 and 2020?", "schema": "CREATE TABLE climate_finance (year INT, community VARCHAR(50), initiative VARCHAR(50), amount FLOAT); INSERT INTO climate_finance (year, community, initiative, amount) VALUES (2016, 'Indigenous community in Canada', 'climate communication', 25000);", "sql": "SELECT AVG(amount) FROM climate_finance WHERE initiative = 'climate communication' AND community LIKE '%Indigenous community in Canada%' AND year BETWEEN 2016 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the status of the institution that was founded in 1996?", "schema": "CREATE TABLE table_name_64 (status VARCHAR, founded VARCHAR)", "sql": "SELECT status FROM table_name_64 WHERE founded = 1996;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player played 2004-05", "schema": "CREATE TABLE table_11545282_18 (player VARCHAR, years_for_jazz VARCHAR)", "sql": "SELECT player FROM table_11545282_18 WHERE years_for_jazz = '2004-05';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 210).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_check ( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Delete a soldier record from the 'soldiers' table with ID 102", "schema": "CREATE TABLE soldiers (id INT PRIMARY KEY, name VARCHAR(50), rank VARCHAR(50), branch VARCHAR(50)); INSERT INTO soldiers (id, name, rank, branch) VALUES (102, 'Jane Doe', 'Lieutenant', 'Navy');", "sql": "DELETE FROM soldiers WHERE id = 102;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which English has Plautdietsch of aupel?", "schema": "CREATE TABLE table_name_64 (english VARCHAR, plautdietsch VARCHAR)", "sql": "SELECT english FROM table_name_64 WHERE plautdietsch = 'aupel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Gold of 0, and a Silver smaller than 3, and a Rank larger than 9, and a Total of 1 has how many numbers of bronze?", "schema": "CREATE TABLE table_name_1 (bronze VARCHAR, total VARCHAR, rank VARCHAR, gold VARCHAR, silver VARCHAR)", "sql": "SELECT COUNT(bronze) FROM table_name_1 WHERE gold = 0 AND silver < 3 AND rank > 9 AND total = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Album has a Year that's larger than 2001?", "schema": "CREATE TABLE table_name_39 (album VARCHAR, year INTEGER)", "sql": "SELECT album FROM table_name_39 WHERE year > 2001;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Which artists have released music under multiple genres?", "schema": "CREATE TABLE artist_genre (artist_id INT, artist_name VARCHAR(255), genre VARCHAR(255)); CREATE TABLE artist (artist_id INT, artist_name VARCHAR(255));", "sql": "SELECT artist_name FROM artist_genre GROUP BY artist_name HAVING COUNT(DISTINCT genre) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year did the term end for those elected in 1990", "schema": "CREATE TABLE table_1602620_1 (term_ended VARCHAR, elected VARCHAR)", "sql": "SELECT term_ended FROM table_1602620_1 WHERE elected = 1990;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Find the maximum quantity of products in the 'furniture' category", "schema": "CREATE TABLE products (product_id INT, category VARCHAR(20), quantity INT); INSERT INTO products (product_id, category, quantity) VALUES (1, 'furniture', 25), (2, 'furniture', 50), (3, 'furniture', 75);", "sql": "SELECT MAX(quantity) FROM products WHERE category = 'furniture';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the pole position for belgian grand prix", "schema": "CREATE TABLE table_name_75 (pole_position VARCHAR, grand_prix VARCHAR)", "sql": "SELECT pole_position FROM table_name_75 WHERE grand_prix = 'belgian grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the average waiting time of cargo ships from African countries at the Port of Los Angeles?", "schema": "CREATE TABLE ports (port_id INT, port_name VARCHAR(100), country VARCHAR(100)); INSERT INTO ports (port_id, port_name, country) VALUES (1, 'Port of Los Angeles', 'USA'); CREATE TABLE cargo_ships (ship_id INT, ship_name VARCHAR(100), port_id INT, waiting_time INT); INSERT INTO cargo_ships (ship_id, ship_name, port_id, waiting_time) VALUES (1, 'African Ship 1', 1, 200), (2, 'African Ship 2', 1, 250), (3, 'African Ship 3', 1, 300);", "sql": "SELECT AVG(waiting_time) FROM cargo_ships WHERE country = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the owner of the c501", "schema": "CREATE TABLE table_name_2 (owner VARCHAR, locomotive VARCHAR)", "sql": "SELECT owner FROM table_name_2 WHERE locomotive = 'c501';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "How many crimes were committed in each neighborhood for the year 2020?", "schema": "CREATE TABLE neighborhoods (id INT, name TEXT, district TEXT); INSERT INTO neighborhoods (id, name, district) VALUES (1, 'Downtown', 'City Center'), (2, 'Richfield', 'North District'); CREATE TABLE crimes (id INT, neighborhood_id INT, type TEXT, year INT, month INT, day INT); INSERT INTO crimes (id, neighborhood_id, type, year, month, day) VALUES (1, 1, 'Theft', 2020, 1, 1), (2, 1, 'Assault', 2019, 12, 31), (3, 2, 'Burglary', 2020, 2, 14);", "sql": "SELECT n.name, c.type, COUNT(c.id) as total_crimes FROM neighborhoods n JOIN crimes c ON n.id = c.neighborhood_id WHERE c.year = 2020 GROUP BY n.id, c.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most draws when goals against are more than 33, losses are 13 and goals for is less than 51?", "schema": "CREATE TABLE table_name_1 (draws INTEGER, goals_for VARCHAR, goals_against VARCHAR, losses VARCHAR)", "sql": "SELECT MAX(draws) FROM table_name_1 WHERE goals_against > 33 AND losses = 13 AND goals_for < 51;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the average rating of Gluten-free products?", "schema": "CREATE TABLE products (product_id INT, name VARCHAR(255), rating DECIMAL(2,1), gluten_free BOOLEAN);", "sql": "SELECT AVG(rating) FROM products WHERE gluten_free = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'updatable_views' (example 40).", "schema": null, "sql": "UPDATE ro_view20 SET b=upper(b);", "explanation": "DML from PostgreSQL core regression test for Updatable Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what even was the compulsory dance scored 23.75?", "schema": "CREATE TABLE table_22644589_4 (event VARCHAR, compulsory_dance__cd_ VARCHAR)", "sql": "SELECT event FROM table_22644589_4 WHERE compulsory_dance__cd_ = '23.75';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average weight of packages shipped to Texas from the New York warehouse?", "schema": "CREATE TABLE Warehouse (id INT, name VARCHAR(255)); INSERT INTO Warehouse (id, name) VALUES (1, 'New York'), (2, 'Texas'); CREATE TABLE Packages (id INT, weight FLOAT, warehouse_id INT, shipment_date DATE); INSERT INTO Packages (id, weight, warehouse_id, shipment_date) VALUES (1, 5.6, 1, '2021-01-01'), (2, 7.2, 1, '2021-01-02'), (3, 3.1, 2, '2021-01-03');", "sql": "SELECT AVG(weight) FROM Packages WHERE warehouse_id = (SELECT id FROM Warehouse WHERE name = 'Texas') AND shipment_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "What is the number of financial capability training sessions conducted per region?", "schema": "CREATE TABLE sessions (region VARCHAR(50), session_date DATE);", "sql": "SELECT region, COUNT(*) FROM sessions GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many entries are shown for entered at 21:09?", "schema": "CREATE TABLE table_29692554_2 (entered VARCHAR, time VARCHAR)", "sql": "SELECT COUNT(entered) FROM table_29692554_2 WHERE time = '21:09';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total R&D expenditure for each disease area?", "schema": "CREATE TABLE rd_expenditure (drug_name TEXT, disease_area TEXT, rd_cost INTEGER); INSERT INTO rd_expenditure (drug_name, disease_area, rd_cost) VALUES ('DrugA', 'Oncology', 20000000), ('DrugB', 'Cardiovascular', 15000000), ('DrugC', 'Oncology', 30000000), ('DrugD', 'Neurology', 25000000);", "sql": "SELECT disease_area, SUM(rd_cost) FROM rd_expenditure GROUP BY disease_area;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average veteran unemployment rate for the last 12 months, rounded to the nearest integer?", "schema": "CREATE TABLE veteran_unemployment (unemployment_rate FLOAT, report_date DATE); INSERT INTO veteran_unemployment (unemployment_rate, report_date) VALUES (4.1, '2021-12-01'), (4.3, '2021-11-01'), (4.5, '2021-10-01');", "sql": "SELECT ROUND(AVG(unemployment_rate)) FROM veteran_unemployment WHERE report_date >= DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'create_index': Write the SELECT query (example 309).", "schema": null, "sql": "SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500;", "explanation": "Regression test for Create Index in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM onek_with_null WHERE unique1 IS NULL AND unique1 > 500) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total number of units with inclusive housing policies in the 'urban' area?", "schema": "CREATE TABLE incl_hsg_policy (policy_id INT, area VARCHAR(20), units INT); INSERT INTO incl_hsg_policy (policy_id, area, units) VALUES (1, 'urban', 50);", "sql": "SELECT SUM(units) FROM incl_hsg_policy WHERE area = 'urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When turquoise is the map colour how many avg. trips per mile (×1000) are there?", "schema": "CREATE TABLE table_17839_1 (avg_trips_per_mile__ VARCHAR, map_colour VARCHAR)", "sql": "SELECT COUNT(avg_trips_per_mile__) AS ×1000_ FROM table_17839_1 WHERE map_colour = 'Turquoise';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What was the total sales revenue for 'DrugE' in the 'Canada' region in Q3 2021?", "schema": "CREATE TABLE sales_data (drug VARCHAR(50), region VARCHAR(50), quarter INT, year INT, revenue FLOAT); INSERT INTO sales_data (drug, region, quarter, year, revenue) VALUES ('DrugE', 'Canada', 3, 2021, 4000000);", "sql": "SELECT SUM(revenue) FROM sales_data WHERE drug = 'DrugE' AND region = 'Canada' AND quarter = 3 AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the others % for cleveland", "schema": "CREATE TABLE table_1733457_1 (others_percentage VARCHAR, county VARCHAR)", "sql": "SELECT others_percentage FROM table_1733457_1 WHERE county = 'Cleveland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average preservation cost for each heritage site in Australia, considering the sites with the highest preservation costs?", "schema": "CREATE TABLE heritage_sites (id INT, name VARCHAR(50), location VARCHAR(50), type VARCHAR(50), PRIMARY KEY(id)); INSERT INTO heritage_sites (id, name, location, type) VALUES (1, 'Sydney Opera House', 'Australia', 'Modern'), (2, 'Uluru', 'Australia', 'Ancient'), (3, 'Port Arthur', 'Australia', 'Historic'); CREATE TABLE preservation_costs (id INT, heritage_site_id INT, cost INT, PRIMARY KEY(id)); INSERT INTO preservation_costs (id, heritage_site_id, cost) VALUES (1, 1, 2000000), (2, 2, 3000000), (3, 3, 1500000);", "sql": "SELECT hs.name, hs.location, hs.type, AVG(pr.cost) AS avg_cost FROM heritage_sites hs JOIN preservation_costs pr ON hs.id = pr.heritage_site_id WHERE hs.location = 'Australia' GROUP BY hs.name, hs.location, hs.type ORDER BY avg_cost DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 246, "num_statements": 1} {"question": "Which sustainable material has the least usage in garment production?", "schema": "CREATE TABLE Materials (material_id INT PRIMARY KEY, material VARCHAR(50), usage INT); INSERT INTO Materials (material_id, material, usage) VALUES (1, 'Organic Cotton', 500), (2, 'Recycled Polyester', 300), (3, 'Hemp', 100);", "sql": "SELECT material FROM (SELECT material, ROW_NUMBER() OVER (ORDER BY usage) AS rank FROM Materials) AS ranked_materials WHERE rank = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of laps that also has a finish total of 8?", "schema": "CREATE TABLE table_name_98 (laps INTEGER, finish VARCHAR)", "sql": "SELECT MAX(laps) FROM table_name_98 WHERE finish = '8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the distinct last names of all the students who have president votes and whose advisor is not 2192.", "schema": "CREATE TABLE STUDENT (LName VARCHAR, PRESIDENT_Vote VARCHAR, Advisor VARCHAR); CREATE TABLE STUDENT (LName VARCHAR, StuID VARCHAR); CREATE TABLE VOTING_RECORD (Id VARCHAR)", "sql": "SELECT DISTINCT T1.LName FROM STUDENT AS T1 JOIN VOTING_RECORD AS T2 ON T1.StuID = PRESIDENT_Vote EXCEPT SELECT DISTINCT LName FROM STUDENT WHERE Advisor = '2192';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the total revenue generated from gluten-free dishes?", "schema": "CREATE TABLE Bakery (bakery_item VARCHAR(50), type VARCHAR(20), price DECIMAL(5,2), revenue DECIMAL(5,2)); INSERT INTO Bakery (bakery_item, type, price, revenue) VALUES ('Almond Croissant', 'Gluten-free', 2.99, 0), ('Chocolate Chip Cookie', 'Gluten-free', 2.49, 0), ('Blueberry Muffin', 'Gluten-free', 3.49, 0);", "sql": "SELECT SUM(price * revenue) FROM Bakery WHERE type = 'Gluten-free';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Update the record of health equity metric for the county with id 001 to 0.85", "schema": "CREATE TABLE health_equity_metrics (county_id VARCHAR(255), health_equity_score FLOAT); INSERT INTO health_equity_metrics (county_id, health_equity_score) VALUES ('001', 0.82), ('002', 0.90), ('003', 0.75);", "sql": "UPDATE health_equity_metrics SET health_equity_score = 0.85 WHERE county_id = '001';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many clubs have a tries against count of 45 and a losing bonus of 4?", "schema": "CREATE TABLE table_12886178_5 (tries_for VARCHAR, tries_against VARCHAR, losing_bonus VARCHAR)", "sql": "SELECT COUNT(tries_for) FROM table_12886178_5 WHERE tries_against = '45' AND losing_bonus = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "List all the clients who have invested in Shariah-compliant funds and their total investment amount.", "schema": "CREATE TABLE clients (client_id INT, name TEXT); CREATE TABLE shariah_compliant_funds (fund_id INT, client_id INT, investment_amount INT); INSERT INTO clients (client_id, name) VALUES (1, 'John Doe'), (2, 'Jane Doe'); INSERT INTO shariah_compliant_funds (fund_id, client_id, investment_amount) VALUES (1, 1, 5000), (2, 1, 7000), (3, 2, 8000);", "sql": "SELECT clients.name, SUM(shariah_compliant_funds.investment_amount) FROM clients JOIN shariah_compliant_funds ON clients.client_id = shariah_compliant_funds.client_id GROUP BY clients.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "Which country has the most factories in the 'renewable energy' sector?", "schema": "CREATE TABLE factories (id INT, name VARCHAR(50), country VARCHAR(50), sector VARCHAR(50)); INSERT INTO factories (id, name, country, sector) VALUES (1, 'Solar Factory', 'Germany', 'renewable energy'), (2, 'Wind Factory', 'China', 'renewable energy'), (3, 'Coal Factory', 'USA', 'non-renewable energy');", "sql": "SELECT country, COUNT(*) as factory_count FROM factories WHERE sector = 'renewable energy' GROUP BY country ORDER BY factory_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Find the average number of weeks patients with depression are hospitalized in France.", "schema": "CREATE TABLE patients (patient_id INT, patient_name VARCHAR(50), condition VARCHAR(50), country VARCHAR(50), hospitalization_date DATE, discharge_date DATE); INSERT INTO patients (patient_id, patient_name, condition, country, hospitalization_date, discharge_date) VALUES (1, 'Jean Dupont', 'Depression', 'France', '2021-02-01', '2021-02-14');", "sql": "SELECT AVG(DATEDIFF(day, patients.hospitalization_date, patients.discharge_date)/7.0) FROM patients WHERE patients.condition = 'Depression' AND patients.country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "List the exhibitions that have the same city", "schema": "CREATE TABLE exhibitions (exhibition_id INT PRIMARY KEY, exhibition_name VARCHAR(255), city VARCHAR(255), country VARCHAR(255));", "sql": "SELECT exhibition_name FROM exhibitions GROUP BY city HAVING COUNT(exhibition_id) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE COLLATION (example 3).", "schema": null, "sql": "CREATE COLLATION german FROM \"de_DE\";", "explanation": "PostgreSQL CREATE COLLATION command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of Total with more than 16 rank for the nation of Romania with more than 0 for silver?", "schema": "CREATE TABLE table_name_36 (total VARCHAR, silver VARCHAR, rank VARCHAR, nation VARCHAR)", "sql": "SELECT COUNT(total) FROM table_name_36 WHERE rank > 16 AND nation = 'romania' AND silver > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: The person that was born in Sydney died in what year?", "schema": "CREATE TABLE table_name_47 (born___died VARCHAR, connection_with_australia VARCHAR)", "sql": "SELECT born___died FROM table_name_47 WHERE connection_with_australia = 'born in sydney';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the court surface when the tournament is paris masters?", "schema": "CREATE TABLE table_name_47 (court_surface VARCHAR, tournament VARCHAR)", "sql": "SELECT court_surface FROM table_name_47 WHERE tournament = 'paris masters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Which digital divide initiatives were completed in 2020?", "schema": "CREATE TABLE Digital_Divide (project_id INT, project_name VARCHAR(100), completion_year INT); INSERT INTO Digital_Divide (project_id, project_name, completion_year) VALUES (1, 'Project X', 2019), (2, 'Project Y', 2020), (3, 'Project Z', 2018);", "sql": "SELECT project_name FROM Digital_Divide WHERE completion_year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the minimum pollution level in the Mediterranean Sea?", "schema": "CREATE TABLE pollution_levels (id INT, location TEXT, pollution_level FLOAT); INSERT INTO pollution_levels (id, location, pollution_level) VALUES (1, 'Mediterranean Sea', 5.0), (2, 'Baltic Sea', 3.0);", "sql": "SELECT MIN(pollution_level) FROM pollution_levels WHERE location = 'Mediterranean Sea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "List all oil spills that have occurred in the Gulf of Mexico since 2010", "schema": "CREATE TABLE oil_spills(spill_name VARCHAR(255), location VARCHAR(255), year INT);INSERT INTO oil_spills(spill_name, location, year) VALUES('Deepwater Horizon','Gulf of Mexico',2010),('Ixtoc I','Gulf of Mexico',1979),('Montara','Timor Sea',2009),('Kuwait', 'Persian Gulf', 1991),('Atlantic Empress','Caribbean Sea',1979);", "sql": "SELECT spill_name FROM oil_spills WHERE location = 'Gulf of Mexico' AND year >= 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is Paul Runyan from?", "schema": "CREATE TABLE table_name_6 (country VARCHAR, name VARCHAR)", "sql": "SELECT country FROM table_name_6 WHERE name = 'paul runyan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List the names of organizations that have made social impact investments in Africa.", "schema": "CREATE TABLE social_impact_investments (investment_id INT, organization_id INT, region VARCHAR(50)); CREATE TABLE organizations (organization_id INT, organization_name VARCHAR(100)); INSERT INTO social_impact_investments (investment_id, organization_id, region) VALUES (1, 1, 'Africa'), (2, 2, 'Europe'), (3, 3, 'Asia'); INSERT INTO organizations (organization_id, organization_name) VALUES (1, 'Global Impact Fund'), (2, 'Renewable Energy Foundation'), (3, 'Community Housing Initiative');", "sql": "SELECT o.organization_name FROM social_impact_investments i INNER JOIN organizations o ON i.organization_id = o.organization_id WHERE i.region = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the market city/market(s) for Rapid City Alternative format?", "schema": "CREATE TABLE table_134987_3 (target_city__market VARCHAR, city_of_license VARCHAR, format VARCHAR)", "sql": "SELECT target_city__market FROM table_134987_3 WHERE city_of_license = 'Rapid City' AND format = 'Alternative';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of the trains that do not pass any station located in London.", "schema": "CREATE TABLE station (station_id VARCHAR); CREATE TABLE train_station (station_id VARCHAR); CREATE TABLE train_station (train_id VARCHAR, station_id VARCHAR); CREATE TABLE train (name VARCHAR, train_id VARCHAR)", "sql": "SELECT T2.name FROM train_station AS T1 JOIN train AS T2 ON T1.train_id = T2.train_id WHERE NOT T1.station_id IN (SELECT T4.station_id FROM train_station AS T3 JOIN station AS T4 ON T3.station_id = T4.station_id WHERE t4.location = 'London');", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which mountain range includes Mount Hubbard?", "schema": "CREATE TABLE table_name_64 (mountain_range VARCHAR, mountain_peak VARCHAR)", "sql": "SELECT mountain_range FROM table_name_64 WHERE mountain_peak = 'mount hubbard';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the date for 2004", "schema": "CREATE TABLE table_name_35 (date VARCHAR, year VARCHAR)", "sql": "SELECT date FROM table_name_35 WHERE year = '2004';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the site with a CERCLIS ID of prd980512362?", "schema": "CREATE TABLE table_name_77 (name VARCHAR, cerclis_id VARCHAR)", "sql": "SELECT name FROM table_name_77 WHERE cerclis_id = 'prd980512362';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Essendon Home Team's venue?", "schema": "CREATE TABLE table_name_61 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_61 WHERE home_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 469).", "schema": null, "sql": "CREATE TABLE z2 (a int, b text);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "What is the highest daily trading volume for each digital asset category?", "schema": "CREATE TABLE digital_asset_categories (id INT, name VARCHAR(255)); CREATE TABLE digital_assets (id INT, category_id INT, name VARCHAR(255), daily_trading_volume DECIMAL(10,2)); INSERT INTO digital_asset_categories (id, name) VALUES (1, 'CategoryA'), (2, 'CategoryB'), (3, 'CategoryC'); INSERT INTO digital_assets (id, category_id, name, daily_trading_volume) VALUES (1, 1, 'Asset1', 5000), (2, 1, 'Asset2', 3000), (3, 2, 'Asset3', 2000), (4, 2, 'Asset4', 1000), (5, 3, 'Asset5', 500);", "sql": "SELECT category_id, MAX(daily_trading_volume) AS Highest_Daily_Trading_Volume FROM digital_assets GROUP BY category_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the ICAO when the IATA is ika?", "schema": "CREATE TABLE table_name_19 (icao VARCHAR, iata VARCHAR)", "sql": "SELECT icao FROM table_name_19 WHERE iata = 'ika';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_info' (example 77).", "schema": null, "sql": "CREATE UNIQUE INDEX ON ONLY p1 (a);", "explanation": "DDL from PostgreSQL core regression test for Partition Info.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 128).", "schema": null, "sql": "SELECT jsonb_exists_any('{\"a\":null, \"b\":\"qq\"}', ARRAY['b','a']);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_exists_any('{\"a\":null, \"b\":\"qq\"}', ARRAY['b','a'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the to par that has ernie els as the player?", "schema": "CREATE TABLE table_name_43 (to_par VARCHAR, player VARCHAR)", "sql": "SELECT to_par FROM table_name_43 WHERE player = 'ernie els';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What opponent has euro '84 qualifying as the type and split as the city?", "schema": "CREATE TABLE table_name_54 (opponent VARCHAR, type_of_game VARCHAR, city VARCHAR)", "sql": "SELECT opponent FROM table_name_54 WHERE type_of_game = 'euro '84 qualifying' AND city = 'split';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the average funding per visual art program, excluding programs with a fundingAmount less than 10000?", "schema": "CREATE TABLE VisualArtPrograms (programID INT, fundingAmount DECIMAL(10,2)); INSERT INTO VisualArtPrograms (programID, fundingAmount) VALUES (1, 12000.00), (2, 8000.00), (3, 15000.00);", "sql": "SELECT AVG(fundingAmount) FROM VisualArtPrograms WHERE fundingAmount >= 10000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 21).", "schema": null, "sql": "SELECT digest('abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu', 'sha512');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the minimum, maximum, and average explainability score of the models developed by different organizations?", "schema": "CREATE TABLE models_explainability (model_id INT, org_id INT, explainability_score FLOAT); INSERT INTO models_explainability (model_id, org_id, explainability_score) VALUES (101, 1, 0.85), (102, 1, 0.92), (103, 2, 0.88), (104, 2, 0.9), (105, 3, 0.95);", "sql": "SELECT org_id, MIN(explainability_score) as min_score, MAX(explainability_score) as max_score, AVG(explainability_score) as avg_score FROM models_explainability GROUP BY org_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 177, "num_statements": 1} {"question": "What is the total quantity of eco-friendly fabric used by each textile supplier in the last year?", "schema": "CREATE TABLE FabricData (FabricID INT, SupplierID INT, FabricType TEXT, Quantity FLOAT, Sustainable BOOLEAN); INSERT INTO FabricData (FabricID, SupplierID, FabricType, Quantity, Sustainable) VALUES (1001, 1, 'Cotton', 500, true), (1002, 1, 'Polyester', 700, false), (1003, 2, 'Hemp', 800, true);", "sql": "SELECT SupplierID, SUM(Quantity) FROM FabricData WHERE Sustainable = true AND FabricDate >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY SupplierID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What is the total number of international tourists visiting Canada, grouped by continent of origin?", "schema": "CREATE TABLE visitors (visitor_country VARCHAR(50), continent VARCHAR(50), total_visits INT); INSERT INTO visitors (visitor_country, continent, total_visits) VALUES ('Canada', 'North America', 25000);", "sql": "SELECT continent, SUM(total_visits) FROM visitors WHERE visitor_country = 'Canada' GROUP BY continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 47).", "schema": null, "sql": "SELECT WHERE 1 IN (1, int4(1), int4(2));", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What is the percentage of students in the \"Hillside\" school district who did not participate in lifelong learning programs last year?", "schema": "CREATE TABLE students (student_id INT, district VARCHAR(20), participated_in_llp BOOLEAN, year INT); INSERT INTO students (student_id, district, participated_in_llp, year) VALUES (1, 'Hillside', TRUE, 2021), (2, 'Hillside', FALSE, 2021), (3, 'Townside', TRUE, 2021);", "sql": "SELECT (COUNT(*) FILTER (WHERE NOT participated_in_llp)) * 100.0 / COUNT(*) FROM students WHERE district = 'Hillside' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 199).", "schema": null, "sql": "SELECT cube_distance('(1,1)'::cube, '(4,5)'::cube);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 51, "num_statements": 1} {"question": "List the names of rural healthcare centers in India that serve more than 150 patients.", "schema": "CREATE TABLE healthcare_centers_india_2 (name TEXT, location TEXT, patients_served INT); INSERT INTO healthcare_centers_india_2 (name, location, patients_served) VALUES ('HC A', 'Rural Tamil Nadu', 200), ('HC B', 'Rural Karnataka', 100), ('HC C', 'Rural Andhra Pradesh', 150);", "sql": "SELECT name FROM healthcare_centers_india_2 WHERE location LIKE 'Rural%' AND patients_served > 150;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Illinois 13 District's Party?", "schema": "CREATE TABLE table_name_59 (party VARCHAR, district VARCHAR)", "sql": "SELECT party FROM table_name_59 WHERE district = 'illinois 13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the L when the W is 4?", "schema": "CREATE TABLE table_29565120_2 (l INTEGER, w VARCHAR)", "sql": "SELECT MIN(l) FROM table_29565120_2 WHERE w = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "List all unique departments, ordered from the fewest projects to the most.", "schema": "CREATE TABLE projects (id INT, engineer_id INT, department VARCHAR(20), cost DECIMAL(10,2)); INSERT INTO projects (id, engineer_id, department, cost) VALUES (1, 1001, 'civil', 5000), (2, 1002, 'civil', 6000), (3, 1003, 'structural', 4000), (4, 1001, 'civil', 7000), (5, 1002, 'civil', 3000), (6, 1003, 'structural', 6000);", "sql": "SELECT department FROM projects GROUP BY department ORDER BY COUNT(*) ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Find the total number of machines that were added in each year", "schema": "CREATE TABLE machines (id INT, name VARCHAR(50), added_date DATE); INSERT INTO machines (id, name, added_date) VALUES (1, 'Machine 1', '2021-01-15'), (2, 'Machine 2', '2022-02-20'), (3, 'Machine 3', '2022-01-05'), (4, 'Machine 4', '2023-03-12'), (5, 'Machine 5', '2023-02-01'), (6, 'Machine 6', '2023-04-15');", "sql": "SELECT YEAR(added_date) AS year, COUNT(*) AS total FROM machines GROUP BY year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the villains in the episodes where Megan is the storyteller and Lorette LeBlanc is the director?", "schema": "CREATE TABLE table_10470082_7 (villains VARCHAR, storyteller VARCHAR, director VARCHAR)", "sql": "SELECT villains FROM table_10470082_7 WHERE storyteller = 'Megan' AND director = 'Lorette LeBlanc';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 66).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('19970710 173201 America/Does_not_exist');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "List all threat intelligence reports related to 'Cyber Warfare' in the 'Asia-Pacific' region.", "schema": "CREATE TABLE threat_reports (id INT, report_name TEXT, category TEXT, region TEXT); INSERT INTO threat_reports (id, report_name, category, region) VALUES (1, 'Report A', 'Cyber Warfare', 'Asia-Pacific'), (2, 'Report B', 'Cyber Warfare', 'Europe');", "sql": "SELECT report_name FROM threat_reports WHERE category = 'Cyber Warfare' AND region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'matview' (example 2).", "schema": null, "sql": "INSERT INTO mvtest_t VALUES\n (1, 'x', 2),\n (2, 'x', 3),\n (3, 'y', 5),\n (4, 'y', 7),\n (5, 'z', 11);", "explanation": "DML from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "What is the average transaction amount per customer in the 'Retail Banking' division?", "schema": "CREATE TABLE Customers (CustomerID INT, Division VARCHAR(20)); INSERT INTO Customers (CustomerID, Division) VALUES (1, 'Retail Banking'), (2, 'Retail Banking'), (3, 'Corporate Banking'); CREATE TABLE Transactions (TransactionID INT, CustomerID INT, Amount DECIMAL(10,2)); INSERT INTO Transactions (TransactionID, CustomerID, Amount) VALUES (1, 1, 500.00), (2, 1, 250.00), (3, 2, 750.00), (4, 3, 1500.00);", "sql": "SELECT AVG(Amount) FROM Transactions INNER JOIN Customers ON Transactions.CustomerID = Customers.CustomerID WHERE Customers.Division = 'Retail Banking';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What is the minimum duration of a song in the pop genre?", "schema": "CREATE TABLE Song (Title VARCHAR(30), Genre VARCHAR(10), Duration FLOAT); INSERT INTO Song (Title, Genre, Duration) VALUES ('Song1', 'Pop', 3.15), ('Song2', 'Pop', 4.23), ('Song3', 'Pop', 2.87), ('Song4', 'Country', 3.56), ('Song5', 'Rock', 4.55);", "sql": "SELECT Genre, MIN(Duration) FROM Song WHERE Genre = 'Pop' GROUP BY Genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the percentage of the population that is obese in each age group in the United States?", "schema": "CREATE TABLE obesity_rates (id INT, age_group TEXT, obesity_rate DECIMAL(4,2), country TEXT); INSERT INTO obesity_rates (id, age_group, obesity_rate, country) VALUES (1, '0-18', 15.3, 'United States'), (2, '19-34', 27.2, 'United States'), (3, '35-49', 36.6, 'United States'), (4, '50-64', 40.2, 'United States'), (5, '65+', 39.5, 'United States');", "sql": "SELECT age_group, obesity_rate FROM obesity_rates WHERE country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total mass of spacecraft manufactured by Galactic Inc, grouped by the continent of their origin?", "schema": "CREATE TABLE SpacecraftManufacturing (Manufacturer VARCHAR(255), Country VARCHAR(255), SpacecraftModel VARCHAR(255), SpacecraftMass INT); INSERT INTO SpacecraftManufacturing (Manufacturer, Country, SpacecraftModel, SpacecraftMass) VALUES ('SpaceTech Corp', 'USA', 'SpaceshipX', 10000), ('SpaceTech Corp', 'USA', 'SpaceshipY', 12000), ('Galactic Inc', 'Canada', 'SpaceshipA', 8000);", "sql": "SELECT SUM(SpacecraftMass) AS Total_Spacecraft_Mass, CONCAT(SUBSTRING(Country, 1, 2), '%') AS Continent FROM SpacecraftManufacturing WHERE Manufacturer = 'Galactic Inc' GROUP BY Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "What are the names and maximum depths for submersibles certified for deep-sea use?", "schema": "CREATE TABLE submersibles (name VARCHAR(255), manufacturer VARCHAR(255), max_depth INT); INSERT INTO submersibles (name, manufacturer, max_depth) VALUES ('Sub1', 'Manufacturer1', 7000);", "sql": "SELECT name, max_depth FROM submersibles WHERE max_depth >= 6000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Score in the final has an Outcome of winner, and a Surface of hard (i)?", "schema": "CREATE TABLE table_name_12 (score_in_the_final VARCHAR, outcome VARCHAR, surface VARCHAR)", "sql": "SELECT score_in_the_final FROM table_name_12 WHERE outcome = 'winner' AND surface = 'hard (i)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Home with a Visitor of chicago, and a Series with 3 – 2?", "schema": "CREATE TABLE table_name_82 (home VARCHAR, visitor VARCHAR, series VARCHAR)", "sql": "SELECT home FROM table_name_82 WHERE visitor = 'chicago' AND series = '3 – 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the number of legal aid clinics in each state in the US, and how many clients do they serve on average?", "schema": "CREATE TABLE us_legal_aid(id INT, state VARCHAR(255), clients_served INT);", "sql": "SELECT state, AVG(clients_served) AS average_clients_served FROM us_legal_aid GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Who are the investors who made investments in a specific country?", "schema": "CREATE TABLE Investments (InvestmentID INT, InvestorID INT, Country VARCHAR(20), Amount INT); INSERT INTO Investments (InvestmentID, InvestorID, Country, Amount) VALUES (1, 1, 'USA', 4000), (2, 1, 'Canada', 3000), (3, 2, 'Mexico', 5000), (4, 2, 'Brazil', 6000), (5, 3, 'USA', 7000), (6, 3, 'Canada', 8000); CREATE TABLE Investors (InvestorID INT, Name VARCHAR(20), Gender VARCHAR(10)); INSERT INTO Investors (InvestorID, Name, Gender) VALUES (1, 'John Doe', 'Male'), (2, 'Jane Smith', 'Female'), (3, 'Jim Brown', 'Male');", "sql": "SELECT Investors.Name FROM Investors JOIN Investments ON Investors.InvestorID = Investments.InvestorID WHERE Investments.Country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which week's game was attended by 65,272 people?", "schema": "CREATE TABLE table_name_18 (week VARCHAR, attendance VARCHAR)", "sql": "SELECT week FROM table_name_18 WHERE attendance = '65,272';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL Query: show example 5.", "schema": null, "sql": "INSERT INTO weather (city, temp_lo, temp_hi, prcp, date) VALUES ('San Francisco', 43, 57, 0.0, '1994-11-29');", "explanation": "Example from PostgreSQL documentation on Query.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Which countries have the highest hotel rating?", "schema": "CREATE TABLE Hotels (hotel_id INT, hotel_name TEXT, country TEXT, rating FLOAT); INSERT INTO Hotels (hotel_id, hotel_name, country, rating) VALUES (1, 'Hotel A', 'Spain', 4.3), (2, 'Hotel B', 'Spain', 4.5), (3, 'Hotel C', 'France', 4.7);", "sql": "SELECT country, MAX(rating) AS max_rating FROM Hotels GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position is there when the main contestant is Vishal Singh and the scores are 1 + 7 + 5 = 13?", "schema": "CREATE TABLE table_name_21 (position VARCHAR, main_contestant VARCHAR, scores_by_each_individual_judge VARCHAR)", "sql": "SELECT position FROM table_name_21 WHERE main_contestant = 'vishal singh' AND scores_by_each_individual_judge = 1 + 7 + 5 = 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Which freight forwarders have handled more than 1000 shipments in total?", "schema": "CREATE TABLE freight_forwarder (id INT, name VARCHAR(25)); INSERT INTO freight_forwarder (id, name) VALUES (1, 'ABC Freight'), (2, 'XYZ Logistics'), (3, 'Global Shipping'); CREATE TABLE shipment (id INT, forwarder_id INT, weight INT); INSERT INTO shipment (id, forwarder_id, weight) VALUES (1, 1, 500), (2, 1, 800), (3, 2, 300), (4, 3, 1200), (5, 1, 900);", "sql": "SELECT f.name, COUNT(s.id) FROM freight_forwarder f JOIN shipment s ON f.id = s.forwarder_id GROUP BY f.name HAVING COUNT(s.id) > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "What is the difference between the average sustainable fabric cost per order for male and female customers?", "schema": "CREATE TABLE orders (order_id INT, customer_id INT, sustainable_fabric_cost DECIMAL(5,2));CREATE TABLE customers (customer_id INT, gender VARCHAR(10));", "sql": "SELECT gender, AVG(sustainable_fabric_cost) - LAG(AVG(sustainable_fabric_cost)) OVER (ORDER BY gender) FROM orders INNER JOIN customers ON orders.customer_id = customers.customer_id GROUP BY gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 198, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Draws, when Losses is \"Did Not Qualify\"?", "schema": "CREATE TABLE table_name_76 (draws VARCHAR, losses VARCHAR)", "sql": "SELECT draws FROM table_name_76 WHERE losses = 'did not qualify';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the language for the movie \"Late Bloomers\"?", "schema": "CREATE TABLE table_22034853_1 (language_s_ VARCHAR, film_title_used_in_nomination VARCHAR)", "sql": "SELECT language_s_ FROM table_22034853_1 WHERE film_title_used_in_nomination = 'Late Bloomers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "How many wells were drilled in the Gulf of Mexico in 2022?", "schema": "CREATE TABLE DrillingPlatforms (PlatformID int, PlatformName varchar(50), Location varchar(50), PlatformType varchar(50), NumberOfWells int); INSERT INTO DrillingPlatforms (PlatformID, PlatformName, Location, PlatformType, NumberOfWells) VALUES (1, 'A01', 'North Sea', 'Offshore', 10), (2, 'B02', 'Gulf of Mexico', 'Offshore', 15), (3, 'C03', 'Texas', 'Onshore', 6), (4, 'D04', 'Texas', 'Onshore', 20); CREATE TABLE DrillingLogs (DrillingLogID int, PlatformID int, WellID int, DrillingDate date); INSERT INTO DrillingLogs (DrillingLogID, PlatformID, WellID, DrillingDate) VALUES (1, 2, 101, '2022-01-01'), (2, 2, 102, '2022-01-02'), (3, 5, 201, '2022-01-03'), (4, 5, 202, '2022-01-04');", "sql": "SELECT COUNT(*) FROM DrillingLogs JOIN DrillingPlatforms ON DrillingLogs.PlatformID = DrillingPlatforms.PlatformID WHERE DrillingPlatforms.Location = 'Gulf of Mexico' AND YEAR(DrillingLogs.DrillingDate) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Which mines in California have an environmental impact score above 75?", "schema": "CREATE TABLE mines (id INT, name TEXT, state TEXT, environmental_score INT); INSERT INTO mines (id, name, state, environmental_score) VALUES (1, 'Delta Mine', 'CA', 85), (2, 'Echo Mine', 'CA', 65), (3, 'Foxtrot Mine', 'CA', 78);", "sql": "SELECT name, environmental_score FROM mines WHERE state = 'CA' AND environmental_score > 75;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'vacuum' (example 66).", "schema": null, "sql": "INSERT INTO past_parted VALUES (1),(1);", "explanation": "DML from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the change over same quarter the previous year in the period when the 89.6% of the trains arrive within 5 minutes of scheduled time (over three months)?", "schema": "CREATE TABLE table_171748_3 (change_over_same_quarter_the_previous_year VARCHAR, _percentage_trains_arriving_within_5_mins_of_scheduled_time__over_three_months_ VARCHAR)", "sql": "SELECT change_over_same_quarter_the_previous_year FROM table_171748_3 WHERE _percentage_trains_arriving_within_5_mins_of_scheduled_time__over_three_months_ = '89.6%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "How many female employees work in the 'sustainable_practices' table?", "schema": "CREATE TABLE sustainable_practices (employee_id INT, first_name VARCHAR(50), last_name VARCHAR(50), gender VARCHAR(10), position VARCHAR(50), hours_worked INT); INSERT INTO sustainable_practices (employee_id, first_name, last_name, gender, position, hours_worked) VALUES (3, 'Alice', 'Johnson', 'Female', 'Analyst', 30); INSERT INTO sustainable_practices (employee_id, first_name, last_name, gender, position, hours_worked) VALUES (4, 'Bob', 'Williams', 'Male', 'Technician', 35); INSERT INTO sustainable_practices (employee_id, first_name, last_name, gender, position, hours_worked) VALUES (8, 'Olga', 'Ivanova', 'Female', 'Technician', 40); INSERT INTO sustainable_practices (employee_id, first_name, last_name, gender, position, hours_worked) VALUES (9, 'Sara', 'Lopez', 'Female', 'Analyst', 35);", "sql": "SELECT COUNT(*) FROM sustainable_practices WHERE gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Japanese Title has a TV Station of ntv?", "schema": "CREATE TABLE table_name_19 (japanese_title VARCHAR, tv_station VARCHAR)", "sql": "SELECT japanese_title FROM table_name_19 WHERE tv_station = 'ntv';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the id and star rating of each hotel, ordered by its price from low to high.", "schema": "CREATE TABLE HOTELS (hotel_id VARCHAR, star_rating_code VARCHAR, price_range VARCHAR)", "sql": "SELECT hotel_id, star_rating_code FROM HOTELS ORDER BY price_range;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What was the number of disaster response operations in Pakistan in 2018?", "schema": "CREATE TABLE disaster_response (disaster_name VARCHAR(255), country VARCHAR(255), operation_start_date DATE, operation_end_date DATE); INSERT INTO disaster_response (disaster_name, country, operation_start_date, operation_end_date) VALUES ('Flood', 'Pakistan', '2018-01-01', '2018-04-30'), ('Earthquake', 'Pakistan', '2018-10-01', '2018-12-31');", "sql": "SELECT COUNT(*) FROM disaster_response WHERE country = 'Pakistan' AND YEAR(operation_start_date) = 2018 AND YEAR(operation_end_date) = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'fast_default' (example 34).", "schema": null, "sql": "INSERT INTO T VALUES (9), (10);", "explanation": "DML from PostgreSQL core regression test for Fast Default.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "What is the total cost of inventory for non-vegetarian menu items in the month of January 2022?", "schema": "CREATE TABLE Menu (menu_id INT, menu_name VARCHAR(20), is_vegetarian BOOLEAN); CREATE TABLE Inventory (inventory_id INT, menu_id INT, inventory_cost FLOAT); INSERT INTO Menu (menu_id, menu_name, is_vegetarian) VALUES (1, 'Breakfast', FALSE), (2, 'Lunch', FALSE), (3, 'Dinner', TRUE), (4, 'Steak', FALSE); INSERT INTO Inventory (inventory_id, menu_id, inventory_cost) VALUES (1, 1, 5.0), (2, 2, 3.5), (3, 4, 15.0), (4, 1, 8.0), (5, 3, 7.0);", "sql": "SELECT SUM(Inventory.inventory_cost) FROM Inventory INNER JOIN Menu ON Inventory.menu_id = Menu.menu_id WHERE Menu.is_vegetarian = FALSE AND MONTH(Inventory.inventory_date) = 1 AND YEAR(Inventory.inventory_date) = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "What is the maximum budget allocated for defense diplomacy by countries in Europe in 2018?", "schema": "CREATE TABLE DefenseDiplomacy (id INT, country VARCHAR(50), budget DECIMAL(10,2), year INT); INSERT INTO DefenseDiplomacy (id, country, budget, year) VALUES (1, 'France', 8000000, 2018), (2, 'Germany', 9000000, 2018), (3, 'Italy', 7000000, 2018);", "sql": "SELECT MAX(budget) FROM DefenseDiplomacy WHERE country IN ('France', 'Germany', 'Italy') AND year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_walinspect' (example 18).", "schema": null, "sql": "SELECT * FROM pg_get_wal_stats('FFFFFFFF/FFFFFFFE', 'FFFFFFFF/FFFFFFFF');", "explanation": "Example query from the 'pg_walinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show the gas production of the well with ID 5 in the Eagle Ford region", "schema": "CREATE TABLE if not exists wells (well_id int, region varchar(50), production_year int, gas_production int);INSERT INTO wells (well_id, region, production_year, gas_production) VALUES (5, 'Eagle Ford', 2019, 550000), (6, 'Eagle Ford', 2020, 600000), (7, 'Marcellus Shale', 2018, 850000);", "sql": "SELECT gas_production FROM wells WHERE well_id = 5 AND region = 'Eagle Ford';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 2).", "schema": null, "sql": "select pgp_pub_decrypt(\n\t\tpgp_pub_encrypt('Secret msg', dearmor(pubkey)),\n\t\tdearmor(seckey))\nfrom keytbl where keytbl.id=2;", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the third bridge over panama canal?", "schema": "CREATE TABLE table_name_83 (location VARCHAR, name VARCHAR)", "sql": "SELECT location FROM table_name_83 WHERE name = 'third bridge over panama canal';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Count the number of startups founded by underrepresented minorities each year", "schema": "CREATE TABLE diversity (company_id INT, founder_minority TEXT); INSERT INTO diversity (company_id, founder_minority) VALUES (1, 'Latinx'); INSERT INTO diversity (company_id, founder_minority) VALUES (2, 'Asian');", "sql": "SELECT founding_year, COUNT(*) FROM company c INNER JOIN diversity d ON c.id = d.company_id WHERE founder_minority IS NOT NULL GROUP BY founding_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the average safety score for each AI algorithm in the 'creative_ai' database?", "schema": "CREATE TABLE creative_ai.ai_algorithms (ai_algorithm_id INT PRIMARY KEY, ai_algorithm VARCHAR(255), safety_score FLOAT); INSERT INTO creative_ai.ai_algorithms (ai_algorithm_id, ai_algorithm, safety_score) VALUES (1, 'Generative Adversarial Networks', 0.75), (2, 'Transformers', 0.85), (3, 'Deep Reinforcement Learning', 0.65);", "sql": "SELECT ai_algorithm, AVG(safety_score) as avg_safety_score FROM creative_ai.ai_algorithms GROUP BY ai_algorithm;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--3.2.1--4.0.0, item 47).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.drop_partition_id(p_parent_table text, p_retention bigint DEFAULT NULL, p_keep_table boolean DEFAULT NULL, p_keep_index boolean DEFAULT NULL, p_retention_schema text DEFAULT NULL) RETURNS int\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_adv_lock boolean;\nv_control text;\nv_control_type text;\nv_count int;\nv_drop_count int := 0;\nv_index record;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_max bigint;\nv_new_search_path text := '@extschema@,pg_temp';\nv_old_search_path text;\nv_parent_schema text;\nv_parent_tablename text;\nv_partition_interval bigint;\nv_partition_id bigint;\nv_partition_type text;\nv_retention bigint;\nv_retention_keep_index boolean;\nv_retention_keep_table boolean;\nv_retention_schema text;\nv_row record;\nv_row_max_id record;\nv_step_id bigint;\n\nBEGIN\n/*\n * Function to drop child tables from an id-based partition set.\n * Options to move table to different schema, drop only indexes or actually drop the table from the database.\n */\n\nv_adv_lock := pg_try_advisory_xact_lock(hashtext('pg_partman drop_partition_id'));\nIF v_adv_lock = 'false' THEN\n RAISE NOTICE 'drop_partition_id already running.';\n RETURN 0;\nEND IF;\n\nIF p_retention IS NULL THEN\n SELECT\n partition_interval::bigint\n , partition_type\n , control\n , retention::bigint\n , retention_keep_table\n , retention_keep_index\n , retention_schema\n , jobmon\n INTO\n v_partition_interval\n , v_partition_type\n , v_control\n , v_retention\n , v_retention_keep_table\n , v_retention_keep_index\n , v_retention_schema\n , v_jobmon\n FROM @extschema@.part_config\n WHERE parent_table = p_parent_table\n AND retention IS NOT NULL;\n\n IF v_partition_interval IS NULL THEN\n RAISE EXCEPTION 'Configuration for given parent table with a retention period not found: %', p_parent_table;\n END IF;\nELSE -- Allow override of configuration options\n SELECT\n partition_interval::bigint\n , partition_type\n , control\n , retention_keep_table\n , retention_keep_index\n , retention_schema\n , jobmon\n INTO\n v_partition_interval\n , v_partition_type\n , v_control\n , v_retention_keep_table\n , v_retention_keep_index\n , v_retention_schema\n , v_jobmon\n FROM @extschema@.part_config\n WHERE parent_table = p_parent_table;\n v_retention := p_retention;\n\n IF v_partition_interval IS NULL THEN\n RAISE EXCEPTION 'Configuration for given parent table not found: %', p_parent_table;\n END IF;\nEND IF;\n\nSELECT general_type INTO v_control_type FROM @extschema@.check_control_type(v_parent_schema, v_parent_tablename, v_control);\nIF v_control_type <> 'id' THEN\n RAISE EXCEPTION 'Data type of control column in given partition set is not an integer type';\nEND IF;\n\nSELECT current_setting('search_path') INTO v_old_search_path;\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon'::name AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n v_new_search_path := '@extschema@,'||v_jobmon_schema||',pg_temp';\n END IF;\nEND IF;\nEXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_new_search_path, 'false');\n\nIF p_keep_table IS NOT NULL THEN\n v_retention_keep_table = p_keep_table;\nEND IF;\nIF p_keep_index IS NOT NULL THEN\n v_retention_keep_index = p_keep_index;\nEND IF;\nIF p_retention_schema IS NOT NULL THEN\n v_retention_schema = p_retention_schema;\nEND IF;\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)::name\nAND tablename = split_part(p_parent_table, '.', 2)::name;\n\n-- Loop through child tables starting from highest to get current max value in partition set\n-- Avoids doing a scan on entire partition set and/or getting any values accidentally in parent.\nFOR v_row_max_id IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(p_parent_table, 'DESC')\nLOOP\n EXECUTE format('SELECT max(%I) FROM %I.%I', v_control, v_row_max_id.partition_schemaname, v_row_max_id.partition_tablename) INTO v_max;\n IF v_max IS NOT NULL THEN\n EXIT;\n END IF;\nEND LOOP;\n\n-- Loop through child tables of the given parent\nFOR v_row IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(p_parent_table, 'ASC')\nLOOP\n SELECT child_start_id INTO v_partition_id FROM @extschema@.show_partition_info(v_row.partition_schemaname||'.'||v_row.partition_tablename\n , v_partition_interval::text\n , p_parent_table);\n\n -- Add one interval since partition names contain the start of the constraint period\n IF v_retention <= (v_max - (v_partition_id + v_partition_interval)) THEN\n\n -- Do not allow final partition to be dropped\n SELECT count(*) INTO v_count FROM @extschema@.show_partitions(p_parent_table);\n IF v_count = 1 THEN\n RAISE WARNING 'Attempt to drop final partition in partition set % as part of retention policy. Advise reviewing retention policy and/or data entry into the partition set.', p_parent_table;\n CONTINUE;\n END IF;\n\n -- Only create a jobmon entry if there's actual retention work done\n IF v_jobmon_schema IS NOT NULL AND v_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN DROP ID PARTITION: %s', p_parent_table));\n END IF;\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Detach/Uninherit table %s.%s from %s', v_row.partition_schemaname, v_row.partition_tablename, p_parent_table));\n END IF;\n IF v_partition_type = 'native' THEN\n EXECUTE format('ALTER TABLE %I.%I DETACH PARTITION %I.%I'\n , v_parent_schema\n , v_parent_tablename\n , v_row.partition_schemaname\n , v_row.partition_tablename);\n ELSE\n EXECUTE format('ALTER TABLE %I.%I NO INHERIT %I.%I'\n , v_row.partition_schemaname\n , v_row.partition_tablename\n , v_parent_schema\n , v_parent_tablename);\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\n END IF;\n IF v_retention_schema IS NULL THEN\n IF v_retention_keep_table = false THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Drop table %s.%s', v_row.partition_schemaname, v_row.partition_tablename));\n END IF;\n EXECUTE format('DROP TABLE %I.%I CASCADE', v_row.partition_schemaname, v_row.partition_tablename);\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\n ELSIF v_retention_keep_index = false THEN\n FOR v_index IN\n WITH child_info AS (\n SELECT c1.oid\n FROM pg_catalog.pg_class c1\n JOIN pg_catalog.pg_namespace n1 ON c1.relnamespace = n1.oid\n WHERE c1.relname = v_row.partition_tablename::name\n AND n1.nspname = v_row.partition_schema::name\n )\n SELECT c.relname as name\n , con.conname\n FROM pg_catalog.pg_index i\n JOIN pg_catalog.pg_class c ON i.indexrelid = c.oid\n LEFT JOIN pg_catalog.pg_constraint con ON i.indexrelid = con.conindid\n JOIN child_info ON i.indrelid = child_info.oid\n LOOP\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Drop index %s from %s.%s'\n , v_index.name\n , v_row.partition_schemaname\n , v_row.partition_tablename));\n END IF;\n IF v_index.conname IS NOT NULL THEN\n EXECUTE format('ALTER TABLE %I.%I DROP CONSTRAINT %I', v_row.partition_schemaname, v_row.partition_tablename, v_index.conname);\n ELSE\n EXECUTE format('DROP INDEX %I.%I', v_row.partition_schemaname, v_index.name);\n END IF;\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\n END LOOP;\n END IF;\n ELSE -- Move to new schema\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Moving table %s.%s to schema %s'\n , v_row.partition_schemaname\n , v_row.partition_tablename\n , v_retention_schema));\n END IF;\n\n EXECUTE format('ALTER TABLE %I.%I SET SCHEMA %I'\n , v_row.partition_schemaname\n , v_row.partition_tablename\n , v_retention_schema);\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\n END IF; -- End retention schema if\n\n -- If child table is a subpartition, remove it from part_config & part_config_sub (should cascade due to FK)\n DELETE FROM @extschema@.part_config WHERE parent_table = v_row.partition_schemaname ||'.'||v_row.partition_tablename;\n\n v_drop_count := v_drop_count + 1;\n END IF; -- End retention check IF\n\nEND LOOP; -- End child table loop\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Finished partition drop maintenance');\n PERFORM update_step(v_step_id, 'OK', format('%s partitions dropped.', v_drop_count));\n PERFORM close_job(v_job_id);\n END IF;\nEND IF;\n\nEXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n\nRETURN v_drop_count;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN DROP ID PARTITION: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 12067, "num_statements": 115} {"question": "PostgreSQL regression test 'misc': Write the SELECT query (example 56).", "schema": null, "sql": "SELECT name(equipment_named_ambiguous_1b(hobby_construct_named(text 'skywalking', text 'mer')));", "explanation": "Regression test for Misc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT name(equipment_named_ambiguous_1b(hobby_construct_named(text 'skywalking', text 'mer')))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the maximum number of events attended by a fan?", "schema": "CREATE TABLE fans (fan_id INT, state VARCHAR(255)); CREATE TABLE events (fan_id INT, event_id INT); INSERT INTO fans (fan_id) VALUES (1), (2), (3), (4), (5); INSERT INTO events (fan_id, event_id) VALUES (1, 101), (1, 102), (1, 103), (2, 101), (3, 102), (3, 103), (3, 104), (4, 101), (5, 101), (5, 102), (5, 103), (5, 104), (5, 105);", "sql": "SELECT MAX(event_count) as max_events FROM (SELECT fan_id, COUNT(*) as event_count FROM events GROUP BY fan_id) as subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many field goals did Walter Shaw make?", "schema": "CREATE TABLE table_14342210_14 (field_goals__5_points_ VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(field_goals__5_points_) FROM table_14342210_14 WHERE player = 'Walter Shaw';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the success rate of cases with 'Murphy' as the lead attorney?", "schema": "CREATE TABLE attorneys (attorney_id INT, name VARCHAR(50)); INSERT INTO attorneys (attorney_id, name) VALUES (1, 'Smith'), (2, 'Johnson'), (3, 'Williams'), (4, 'Murphy'); CREATE TABLE cases (case_id INT, attorney_id INT, is_success BOOLEAN); INSERT INTO cases (case_id, attorney_id, is_success) VALUES (1, 2, TRUE), (2, 1, FALSE), (3, 3, TRUE), (4, 4, TRUE);", "sql": "SELECT COUNT(*) / (SELECT COUNT(*) FROM cases WHERE cases.attorney_id = attorneys.attorney_id) AS success_rate FROM attorneys INNER JOIN cases ON attorneys.attorney_id = cases.attorney_id WHERE attorneys.name = 'Murphy' AND is_success = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 1} {"question": "What was the total number of visitors to each continent in 2021?", "schema": "CREATE TABLE if not exists VisitorContinents (Continent VARCHAR(50), Country VARCHAR(50), Visitors INT); INSERT INTO VisitorContinents (Continent, Country, Visitors) VALUES ('Africa', 'Egypt', 120000), ('Asia', 'Japan', 240000), ('Europe', 'France', 300000), ('South America', 'Brazil', 140000), ('North America', 'Canada', 170000), ('Oceania', 'Australia', 270000);", "sql": "SELECT a.Continent, SUM(a.Visitors) AS TotalVisitors FROM VisitorContinents a WHERE a.Year = 2021 GROUP BY a.Continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Find total expenses and income for each month in the 'expenses' and 'income' tables", "schema": "CREATE TABLE expenses (expense_id INT, expense_amount DECIMAL, expense_date DATE); CREATE TABLE income (income_id INT, income_amount DECIMAL, income_date DATE);", "sql": "SELECT DATE_FORMAT(expenses.expense_date, '%Y-%m') as month, SUM(expenses.expense_amount) as total_expenses, SUM(income.income_amount) as total_income FROM expenses FULL OUTER JOIN income ON DATE_FORMAT(expenses.expense_date, '%Y-%m') = DATE_FORMAT(income.income_date, '%Y-%m') GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 293, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the away team was Bolton Wanderers, what was the score?", "schema": "CREATE TABLE table_name_83 (score VARCHAR, away_team VARCHAR)", "sql": "SELECT score FROM table_name_83 WHERE away_team = 'bolton wanderers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the constructor of the car that Hermann Lang drove after 1935?", "schema": "CREATE TABLE table_name_89 (constructor VARCHAR, year VARCHAR, driver VARCHAR)", "sql": "SELECT constructor FROM table_name_89 WHERE year > 1935 AND driver = 'hermann lang';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average donation amount in Q1 2023 from returning donors in Latin America?", "schema": "CREATE TABLE Donors (DonorID int, DonorType varchar(50), Country varchar(50), AmountDonated numeric(18,2), DonationDate date, IsReturningDonor bit); INSERT INTO Donors (DonorID, DonorType, Country, AmountDonated, DonationDate, IsReturningDonor) VALUES (1, 'Individual', 'Brazil', 3000, '2023-01-02', 1), (2, 'Organization', 'Argentina', 8000, '2023-01-03', 0);", "sql": "SELECT AVG(AmountDonated) FROM Donors WHERE DonorType = 'Individual' AND Country LIKE 'Latin America%' AND IsReturningDonor = 1 AND QUARTER(DonationDate) = 1 AND YEAR(DonationDate) = 2023;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Add a new student named \"Jamie\" with a major in \"Computer Science\" and a GPA of 3.8 to the \"students\" table.", "schema": "CREATE TABLE students (student_id INT, name VARCHAR(255), major VARCHAR(255), gpa DECIMAL(3,2));", "sql": "INSERT INTO students (name, major, gpa) VALUES ('Jamie', 'Computer Science', 3.8);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the minimum budget allocated for any service in Florida?", "schema": "CREATE TABLE service_budget (state VARCHAR(50), service VARCHAR(50), budget INT); INSERT INTO service_budget (state, service, budget) VALUES ('Florida', 'Education', 5000000), ('Florida', 'Highway Maintenance', 3000000);", "sql": "SELECT MIN(budget) FROM service_budget WHERE state = 'Florida';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What stake has 16 wards/branches in Arkansas?", "schema": "CREATE TABLE table_name_97 (stake VARCHAR, wards__branches_in_arkansas VARCHAR)", "sql": "SELECT stake FROM table_name_97 WHERE wards__branches_in_arkansas = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 172).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (3,7,'49383414785234649002982046297226894664526726187218771083.0993243619030008310875293647868815940421844461627295157812843657782639833900543200310573708100000958929315945039020410482966753145208427035917753919085618457760620513481628641658765820294863970581642745379331727722585319163262763708386199720411053619449096019862596221607526610103408936214184850115071874430846697061554769773328338028749631552202705583855831155461651414320570061181212214810086436100771547030013079997847086');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 521, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What opponent has l 27-7 as the result?", "schema": "CREATE TABLE table_name_83 (opponent VARCHAR, result VARCHAR)", "sql": "SELECT opponent FROM table_name_83 WHERE result = 'l 27-7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which week was the game on December 14, 1967?", "schema": "CREATE TABLE table_name_27 (week VARCHAR, date VARCHAR)", "sql": "SELECT week FROM table_name_27 WHERE date = 'december 14, 1967';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the overall draft number for the running back drafted after round 1 from fresno state?", "schema": "CREATE TABLE table_name_68 (overall INTEGER, college VARCHAR, position VARCHAR, round VARCHAR)", "sql": "SELECT SUM(overall) FROM table_name_68 WHERE position = 'running back' AND round > 1 AND college = 'fresno state';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the minimum and maximum number of art pieces created by artists from the Middle East and North Africa in the last decade?", "schema": "CREATE TABLE ArtPieces (creation_date DATE, artist_region VARCHAR(30), num_pieces INT); INSERT INTO ArtPieces (creation_date, artist_region, num_pieces) VALUES ('2012-01-01', 'Middle East', 120), ('2012-01-02', 'Middle East', 150), ('2013-01-03', 'Middle East', 80), ('2014-01-04', 'Middle East', 90), ('2015-02-01', 'North Africa', 120), ('2016-02-02', 'North Africa', 150), ('2017-02-03', 'North Africa', 80), ('2018-02-04', 'North Africa', 90), ('2019-03-01', 'Middle East', 120), ('2020-03-02', 'Middle East', 150), ('2021-03-03', 'Middle East', 80), ('2022-03-04', 'Middle East', 90);", "sql": "SELECT MIN(num_pieces), MAX(num_pieces) FROM ArtPieces WHERE artist_region IN ('Middle East', 'North Africa') AND creation_date >= DATEADD(YEAR, -10, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the high points of game 5?", "schema": "CREATE TABLE table_11960196_3 (high_points VARCHAR, game VARCHAR)", "sql": "SELECT high_points FROM table_11960196_3 WHERE game = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many garments were produced in each region using recycled materials?", "schema": "CREATE TABLE Recycled_Material_Garments (id INT, region VARCHAR, quantity INT);", "sql": "SELECT region, SUM(quantity) FROM Recycled_Material_Garments GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When Geelong is the Away team, what did the Home team score?", "schema": "CREATE TABLE table_name_2 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_2 WHERE away_team = 'geelong';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is SECOND, when SIXTH is Victoria?", "schema": "CREATE TABLE table_name_18 (second VARCHAR, sixth VARCHAR)", "sql": "SELECT second FROM table_name_18 WHERE sixth = 'victoria';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What are the names and case IDs of all cases that have been resolved in the state of California?", "schema": "CREATE TABLE court_cases (case_id INT, case_status TEXT, case_state TEXT); INSERT INTO court_cases (case_id, case_status, case_state) VALUES (66666, 'Resolved', 'California');", "sql": "SELECT case_id, case_status FROM court_cases WHERE case_state = 'California' AND case_status = 'Resolved';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result for the opponent being gerardi rinaldi in 2009?", "schema": "CREATE TABLE table_name_14 (result VARCHAR, date VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_14 WHERE date = 2009 AND opponent = 'gerardi rinaldi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What are the endangered languages in South America?", "schema": "CREATE TABLE EndangeredLanguages (id INT, language VARCHAR(50), status VARCHAR(50), country VARCHAR(50)); INSERT INTO EndangeredLanguages (id, language, status, country) VALUES (1, 'Mapudungun', 'Vulnerable', 'Chile'); INSERT INTO EndangeredLanguages (id, language, status, country) VALUES (2, 'Quechua', 'Vulnerable', 'Peru');", "sql": "SELECT EndangeredLanguages.language FROM EndangeredLanguages WHERE EndangeredLanguages.status = 'Vulnerable' AND EndangeredLanguages.country IN ('Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 277, "num_statements": 1} {"question": "What is the average salary of engineers in 'asia_mines'?", "schema": "CREATE SCHEMA if not exists asia_schema;CREATE TABLE asia_schema.asia_mines (id INT, name VARCHAR, role VARCHAR, salary DECIMAL);INSERT INTO asia_schema.asia_mines (id, name, role, salary) VALUES (1, 'A worker', 'Engineer', 65000.00), (2, 'B engineer', 'Engineer', 72000.00);", "sql": "SELECT AVG(salary) FROM asia_schema.asia_mines WHERE role = 'Engineer';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average quantity of gold mined per day by each mine?", "schema": "CREATE TABLE mine (mine_id INT, mine_name TEXT, location TEXT, daily_gold_mined INT); INSERT INTO mine VALUES (1, 'ABC Mine', 'Wyoming, USA', 150), (2, 'DEF Mine', 'West Virginia, USA', 120), (3, 'GHI Mine', 'Kentucky, USA', 80), (4, 'JKL Mine', 'Colorado, USA', 170);", "sql": "SELECT mine_name, AVG(daily_gold_mined) as avg_daily_gold_mined FROM mine GROUP BY mine_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average Industry, when Agriculture is greater than 11?", "schema": "CREATE TABLE table_name_44 (industry INTEGER, agriculture INTEGER)", "sql": "SELECT AVG(industry) FROM table_name_44 WHERE agriculture > 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Result, when Opponent is New England Patriots?", "schema": "CREATE TABLE table_name_8 (result VARCHAR, opponent VARCHAR)", "sql": "SELECT result FROM table_name_8 WHERE opponent = 'new england patriots';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the number of vessels that had a maintenance date in Q3 of 2020 and visited the Port of Shanghai in Q2 of 2019.", "schema": "CREATE TABLE vessels (vessel_id INT, vessel_name TEXT, last_maintenance_date DATE); INSERT INTO vessels VALUES (1, 'Vessel A', '2020-07-15'), (2, 'Vessel B', '2020-10-10'), (3, 'Vessel C', '2020-09-25'); CREATE TABLE port_visits (visit_id INT, vessel_id INT, port_id INT, visit_date DATE); INSERT INTO port_visits VALUES (1, 1, 8, '2019-04-01'), (2, 2, 8, '2019-05-01'), (3, 3, 8, '2019-06-01');", "sql": "SELECT COUNT(DISTINCT vessels.vessel_id) FROM vessels JOIN port_visits ON vessels.vessel_id = port_visits.vessel_id WHERE QUARTER(vessels.last_maintenance_date) = 3 AND port_visits.port_id = 8 AND QUARTER(port_visits.visit_date) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "What is the total fare collected on route 101?", "schema": "CREATE TABLE routes (route_id INT, name VARCHAR(255)); INSERT INTO routes (route_id, name) VALUES (101, 'Route 101'); CREATE TABLE fares (fare_id INT, route_id INT, amount DECIMAL(5,2)); INSERT INTO fares (fare_id, route_id, amount) VALUES (1, 101, 2.50), (2, 101, 3.75);", "sql": "SELECT SUM(amount) FROM fares WHERE route_id = 101;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Find the total sales for each menu item from the sales_fact table, grouped by menu_item_id.", "schema": "CREATE TABLE sales_fact (sale_id INT, menu_item_id INT, sale_quantity INT, sale_price DECIMAL, sale_date DATE);", "sql": "SELECT menu_item_id, SUM(sale_quantity * sale_price) as total_sales FROM sales_fact GROUP BY menu_item_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Market/Rank is associated with WCRN calls?", "schema": "CREATE TABLE table_10333757_1 (market_rank VARCHAR, calls VARCHAR)", "sql": "SELECT market_rank FROM table_10333757_1 WHERE calls = 'WCRN';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the distribution of confidence scores for fairness-related predictions made by models 'Fairlearn' and 'AIF360' in the 'model_performance' table?", "schema": "CREATE TABLE model_performance (model_name VARCHAR(20), prediction VARCHAR(20), confidence FLOAT); INSERT INTO model_performance (model_name, prediction, confidence) VALUES ('Fairlearn', 'fairness', 0.85), ('Fairlearn', 'bias', 0.91), ('AIF360', 'fairness', 0.78), ('AIF360', 'explainability', 0.95);", "sql": "SELECT model_name, COUNT(*) as count, AVG(confidence) as avg_confidence FROM model_performance WHERE prediction LIKE '%fairness%' GROUP BY model_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the average playtime, in hours, for players from the United Kingdom, for games in the 'Adventure' genre?", "schema": "CREATE TABLE games (game_id INT, game_genre VARCHAR(255), player_id INT, playtime_mins INT); CREATE TABLE players (player_id INT, player_country VARCHAR(255));", "sql": "SELECT AVG(playtime_mins / 60) FROM games JOIN players ON games.player_id = players.player_id WHERE players.player_country = 'United Kingdom' AND game_genre = 'Adventure';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "What is the average property price in eco-friendly neighborhoods with more than 5 sustainable urban projects?", "schema": "CREATE TABLE neighborhoods (neighborhood_id INT, name VARCHAR(255), avg_property_price DECIMAL(10,2), num_sustainable_projects INT); INSERT INTO neighborhoods (neighborhood_id, name, avg_property_price, num_sustainable_projects) VALUES (1, 'Central Park', 850000, 4), (2, 'Soho', 1200000, 2), (3, 'Greenwich Village', 1100000, 6), (4, 'Harlem', 600000, 7);", "sql": "SELECT avg_property_price FROM neighborhoods WHERE num_sustainable_projects > 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Find the autonomous driving research projects that are conducted by companies from the USA and China.", "schema": "CREATE TABLE AutonomousResearch (project VARCHAR(20), company VARCHAR(20)); INSERT INTO AutonomousResearch (project, company) VALUES ('Tesla Autopilot', 'Tesla'); INSERT INTO AutonomousResearch (project, company) VALUES ('Baidu Apollo', 'Baidu');", "sql": "SELECT project FROM AutonomousResearch WHERE company IN ('Tesla', 'Baidu');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the UK broadcast date of the episode with Ben Okri as the presenter?", "schema": "CREATE TABLE table_name_87 (uk_broadcast_date VARCHAR, presenter VARCHAR)", "sql": "SELECT uk_broadcast_date FROM table_name_87 WHERE presenter = 'ben okri';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Find the total number of concerts in each country for the 'Rock' genre.", "schema": "CREATE TABLE Concerts (ConcertId INT, Venue VARCHAR(255), Country VARCHAR(255), Genre VARCHAR(255), Attendees INT); INSERT INTO Concerts (ConcertId, Venue, Country, Genre, Attendees) VALUES (1, 'Wembley Stadium', 'UK', 'Rock', 50000), (2, 'Stade de France', 'France', 'Rock', 60000), (3, 'MetLife Stadium', 'USA', 'Rock', 40000), (4, 'Estadio Azteca', 'Mexico', 'Rock', 70000), (5, 'ANZ Stadium', 'Australia', 'Rock', 30000);", "sql": "SELECT Country, Genre, SUM(Attendees) AS TotalConcerts FROM Concerts WHERE Genre = 'Rock' GROUP BY Country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the overall number of times when the calendar showed october 6", "schema": "CREATE TABLE table_27733909_1 (record VARCHAR, date VARCHAR)", "sql": "SELECT COUNT(record) FROM table_27733909_1 WHERE date = 'October 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 134).", "schema": null, "sql": "SELECT array_cat(ARRAY[1,2], ARRAY[[3,4],[5,6]]) AS \"{{1,2},{3,4},{5,6}}\";", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT array_cat(ARRAY[1,2], ARRAY[[3,4],[5,6]]) AS \"{{1,2},{3,4},{5,6}}\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the rating for 3.79 viewers", "schema": "CREATE TABLE table_22822468_2 (rating VARCHAR, viewers__millions_ VARCHAR)", "sql": "SELECT rating / SHARE(18 AS –49) FROM table_22822468_2 WHERE viewers__millions_ = '3.79';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the percentage of sustainable materials used in production for each item, ordered by the percentage in descending order?", "schema": "CREATE TABLE Production (item_id INT, material VARCHAR(255), weight DECIMAL(5,2), sustainable BOOLEAN); INSERT INTO Production (item_id, material, weight, sustainable) VALUES (1, 'Organic Cotton', 2.5, true), (2, 'Polyester', 1.5, false), (3, 'Recycled Wool', 3.0, true), (4, 'Polyester', 2.0, false), (5, 'Organic Cotton', 4.0, true);", "sql": "SELECT item_id, 100.0 * SUM(CASE WHEN sustainable THEN weight ELSE 0 END) / SUM(weight) OVER (PARTITION BY item_id) AS percentage FROM Production ORDER BY percentage DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 171, "num_statements": 1} {"question": "What is the total number of successful satellite launches by Japanese and Brazilian space programs?", "schema": "CREATE TABLE Satellite_Launches (launch_date DATE, country VARCHAR(255), success BOOLEAN); INSERT INTO Satellite_Launches (launch_date, country, success) VALUES ('2020-01-01', 'Japan', TRUE), ('2020-02-01', 'Brazil', FALSE), ('2020-03-01', 'Japan', TRUE), ('2020-04-01', 'Brazil', TRUE), ('2020-05-01', 'Japan', FALSE);", "sql": "SELECT SUM(success) FROM (SELECT success FROM Satellite_Launches WHERE country IN ('Japan', 'Brazil')) AS subquery WHERE success = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year did anna thompson have a 7th place result?", "schema": "CREATE TABLE table_name_85 (year VARCHAR, result VARCHAR)", "sql": "SELECT year FROM table_name_85 WHERE result = '7th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the build date of the locomotive with a fa-2 model?", "schema": "CREATE TABLE table_name_55 (build_date VARCHAR, model VARCHAR)", "sql": "SELECT build_date FROM table_name_55 WHERE model = 'fa-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total revenue generated from ticket sales for the NY Knicks?", "schema": "CREATE TABLE teams (team_id INT, team_name VARCHAR(50)); INSERT INTO teams (team_id, team_name) VALUES (1, 'NY Knicks'), (2, 'LA Lakers'); CREATE TABLE ticket_sales (id INT, team_id INT, revenue INT);", "sql": "SELECT SUM(ticket_sales.revenue) FROM ticket_sales JOIN teams ON ticket_sales.team_id = teams.team_id WHERE teams.team_name = 'NY Knicks';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the most laps led in monterey, california when scott rarick won the race?", "schema": "CREATE TABLE table_25668203_2 (most_laps_led VARCHAR, location VARCHAR, winning_driver VARCHAR)", "sql": "SELECT most_laps_led FROM table_25668203_2 WHERE location = 'Monterey, California' AND winning_driver = 'Scott Rarick';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'triggers' (example 90).", "schema": null, "sql": "UPDATE city_table SET city_name = NEW.city_name,\n population = NEW.population,\n country_id = ctry_id\n WHERE city_id = OLD.city_id;", "explanation": "DML from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 199, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the names of people in ascending order of height?", "schema": "CREATE TABLE People (Name VARCHAR, Height VARCHAR)", "sql": "SELECT Name FROM People ORDER BY Height;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What is the total quantity of product 4 in the circular supply chain?", "schema": "CREATE TABLE circular_supply_chain (product_id INT, supplier_id INT, retailer_id INT, quantity INT); INSERT INTO circular_supply_chain (product_id, supplier_id, retailer_id, quantity) VALUES (4, 4, 4, 120);", "sql": "SELECT SUM(quantity) AS total_quantity FROM circular_supply_chain WHERE product_id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "List the programs with a budget over $5000 but no volunteers.", "schema": "CREATE TABLE Programs (ProgramID INT, Name TEXT, Budget DECIMAL); CREATE TABLE Volunteers (VolunteerID INT, ProgramID INT); INSERT INTO Programs (ProgramID, Name, Budget) VALUES (1, 'Education', 6000.00), (2, 'Health', 4000.00), (3, 'Environment', 7000.00); INSERT INTO Volunteers (VolunteerID, ProgramID) VALUES (1, 1), (2, 2);", "sql": "SELECT Programs.Name FROM Programs LEFT JOIN Volunteers ON Programs.ProgramID = Volunteers.ProgramID WHERE Programs.Budget > 5000 AND Volunteers.ProgramID IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the maximum number of artifacts found in a single excavation session?", "schema": "CREATE TABLE excavation_sessions (session_id INT, site_id INT, year INT, session_date DATE, total_artifacts INT); INSERT INTO excavation_sessions (session_id, site_id, year, session_date, total_artifacts) VALUES (1, 1, 2018, '2018-01-01', 50), (2, 1, 2018, '2018-01-02', 75), (3, 1, 2018, '2018-01-03', 100), (4, 2, 2019, '2019-02-01', 70), (5, 2, 2019, '2019-02-02', 80), (6, 2, 2019, '2019-02-03', 90), (7, 3, 2020, '2020-03-01', 150), (8, 3, 2020, '2020-03-02', 200), (9, 3, 2020, '2020-03-03', 250);", "sql": "SELECT MAX(total_artifacts) FROM excavation_sessions;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What college picked someone from The Episcopal Academy?", "schema": "CREATE TABLE table_name_47 (college VARCHAR, school VARCHAR)", "sql": "SELECT college FROM table_name_47 WHERE school = 'episcopal academy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest Attendance, when Date is \"Oct. 26\"?", "schema": "CREATE TABLE table_name_25 (attendance INTEGER, date VARCHAR)", "sql": "SELECT MAX(attendance) FROM table_name_25 WHERE date = 'oct. 26';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many roller coasters are there?", "schema": "CREATE TABLE roller_coaster (Id VARCHAR)", "sql": "SELECT COUNT(*) FROM roller_coaster;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the property size of the smallest co-owned property in London?", "schema": "CREATE TABLE properties (id INT, size FLOAT, co_owned BOOLEAN, city VARCHAR(20)); INSERT INTO properties (id, size, co_owned, city) VALUES (1, 1500, TRUE, 'London'), (2, 2000, FALSE, 'London'), (3, 1000, TRUE, 'London');", "sql": "SELECT size FROM properties WHERE city = 'London' AND co_owned = TRUE ORDER BY size ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Compare the wastewater treatment capacities in New York and Florida.", "schema": "CREATE TABLE wastewater_treatment_capacities(state VARCHAR(20), capacity INT); INSERT INTO wastewater_treatment_capacities(state, capacity) VALUES ('New York', 5000000), ('Florida', 3000000);", "sql": "SELECT capacity FROM wastewater_treatment_capacities WHERE state IN ('New York', 'Florida') ORDER BY capacity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the party with district being alabama 3", "schema": "CREATE TABLE table_1342292_2 (party VARCHAR, district VARCHAR)", "sql": "SELECT party FROM table_1342292_2 WHERE district = 'Alabama 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Insert a new record of equipment maintenance for a Jeep on Apr 30, 2021 with labor cost 200 and parts cost 100.", "schema": "CREATE TABLE Maintenance (id INT, equipment VARCHAR(255), date DATE, labor INT, parts INT);", "sql": "INSERT INTO Maintenance (equipment, date, labor, parts) VALUES ('Jeep', '2021-04-30', 200, 100);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What was the average number of strikeouts per game for Team G in the 2018 season?", "schema": "CREATE TABLE games (id INT, team TEXT, location TEXT, strikeouts INT); INSERT INTO games (id, team, location, strikeouts) VALUES (1, 'Team G', 'Home', 12), (2, 'Team G', 'Away', 15);", "sql": "SELECT AVG(strikeouts) FROM games WHERE team = 'Team G' AND year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest top-25 with more than 9 top-10 but less than 29 events?", "schema": "CREATE TABLE table_name_17 (top_25 INTEGER, top_10 VARCHAR, events VARCHAR)", "sql": "SELECT MAX(top_25) FROM table_name_17 WHERE top_10 > 9 AND events < 29;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many long range shots did tonya edwards make.", "schema": "CREATE TABLE table_24906653_5 (field_goals VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(field_goals) FROM table_24906653_5 WHERE player = 'Tonya Edwards';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Which menu items have the highest cost?", "schema": "CREATE TABLE menu_items (menu_item_id INT, item_name VARCHAR(255), category VARCHAR(255), price INT); INSERT INTO menu_items (menu_item_id, item_name, category, price) VALUES (1, 'Steak', 'Entree', 25), (2, 'Fries', 'Side', 5), (3, 'Burger', 'Entree', 15), (4, 'Salad', 'Side', 8);", "sql": "SELECT item_name, price FROM menu_items ORDER BY price DESC LIMIT 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Identify the number of spacecraft missions per day, and rank them in descending order?", "schema": "CREATE TABLE spacecraft_missions (spacecraft_name TEXT, mission_date DATE);", "sql": "SELECT DATE_TRUNC('day', mission_date) as mission_day, COUNT(*) as mission_count, RANK() OVER (ORDER BY COUNT(*) DESC) as mission_rank FROM spacecraft_missions GROUP BY mission_day ORDER BY mission_rank;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team defeated the Capital City Giants?", "schema": "CREATE TABLE table_name_11 (team VARCHAR, runner_up VARCHAR)", "sql": "SELECT team FROM table_name_11 WHERE runner_up = 'capital city giants';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total quantity of each product in the Sydney warehouse?", "schema": "CREATE TABLE Warehouses (WarehouseID int, WarehouseName varchar(255), City varchar(255), Country varchar(255)); INSERT INTO Warehouses (WarehouseID, WarehouseName, City, Country) VALUES (4, 'Sydney Warehouse', 'Sydney', 'Australia'); CREATE TABLE Inventory (InventoryID int, WarehouseID int, ProductName varchar(255), Quantity int); INSERT INTO Inventory (InventoryID, WarehouseID, ProductName, Quantity) VALUES (4, 4, 'Pears', 100);", "sql": "SELECT ProductName, SUM(Quantity) AS TotalQuantity FROM Inventory WHERE WarehouseID = 4 GROUP BY ProductName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the average calorie count per dish for dishes that have more than 5 reviews?", "schema": "CREATE TABLE dishes (dish_id INT, name VARCHAR(255), calories INT, reviews INT); INSERT INTO dishes (dish_id, name, calories, reviews) VALUES (1, 'Pizza', 300, 7), (2, 'Pasta', 400, 3), (3, 'Salad', 200, 8), (4, 'Burger', 500, 10);", "sql": "SELECT AVG(calories) FROM (SELECT calories, ROW_NUMBER() OVER (PARTITION BY name ORDER BY reviews DESC) rn FROM dishes) t WHERE t.rn > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many wins does the club with 565 points for have?", "schema": "CREATE TABLE table_1676073_13 (won VARCHAR, points_for VARCHAR)", "sql": "SELECT won FROM table_1676073_13 WHERE points_for = '565';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total quantity of vegetarian items sold per store?", "schema": "CREATE TABLE Stores (StoreID INT, StoreName VARCHAR(50)); INSERT INTO Stores (StoreID, StoreName) VALUES (1, 'Store A'), (2, 'Store B'); CREATE TABLE Menu (MenuID INT, MenuItem VARCHAR(50), Type VARCHAR(20), StoreID INT); INSERT INTO Menu (MenuID, MenuItem, Type, StoreID) VALUES (1, 'Falafel', 'Vegetarian', 1), (2, 'Chicken Shawarma', 'Non-Vegetarian', 1), (3, 'Vegetable Curry', 'Vegetarian', 2), (4, 'Steak Platter', 'Non-Vegetarian', 2); CREATE TABLE Sales (SaleID INT, MenuID INT, Quantity INT, OrderDate DATE); INSERT INTO Sales (SaleID, MenuID, Quantity, OrderDate) VALUES (1, 1, 25, '2022-01-01'), (2, 2, 15, '2022-01-01'), (3, 3, 30, '2022-01-02'), (4, 4, 10, '2022-01-02');", "sql": "SELECT StoreID, SUM(Quantity) as TotalQuantity FROM Sales s JOIN Menu m ON s.MenuID = m.MenuID WHERE Type = 'Vegetarian' GROUP BY StoreID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "pgTAP test for Roletap (assertion 5).", "schema": null, "sql": "SELECT * FROM check_test(\n has_role('aoijaoisjfaoidfjaisjdfosjf', 'desc'),\n false,\n 'has_role(nonexistent role, desc)',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Roletap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Delete transactions over $1000 for customers with 'Gold' status", "schema": "CREATE TABLE transactions (id INT PRIMARY KEY, customer_id INT, amount DECIMAL(10,2), transaction_date DATE, customer_status VARCHAR(50)); INSERT INTO transactions (id, customer_id, amount, transaction_date, customer_status) VALUES (1, 1, 500.00, '2022-01-01', 'Gold'); INSERT INTO transactions (id, customer_id, amount, transaction_date, customer_status) VALUES (2, 2, 750.00, '2022-01-02', 'Silver');", "sql": "DELETE t FROM transactions t WHERE t.amount > 1000 AND t.customer_status = 'Gold';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 94).", "schema": null, "sql": "SELECT * FROM check_test(\n is_descendent_of( 'hide', 'h_child1', 'hide', 'h_parent', 1 ),\n true,\n 'is_descendent_of(csch, ctab, psch, ptab, 1)',\n 'Table hide.h_child1 should be descendent 1 from hide.h_parent',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "What is the total CO2 emissions from manufacturing cosmetics in the last 12 months?", "schema": "CREATE TABLE manufacturing_emissions (emission_id INT, product_id INT, co2_emissions FLOAT, emission_date DATE);", "sql": "SELECT SUM(co2_emissions) FROM manufacturing_emissions WHERE emission_date BETWEEN DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) AND CURRENT_DATE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 48).", "schema": null, "sql": "CREATE FUNCTION subarray(_int4, int4)\nRETURNS _int4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What points for have 30 as the game, and 73 as the total points?", "schema": "CREATE TABLE table_name_92 (points_for VARCHAR, games VARCHAR, total_points VARCHAR)", "sql": "SELECT points_for FROM table_name_92 WHERE games = '30' AND total_points = '73';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'domain': Write the SELECT query (example 341).", "schema": null, "sql": "select array[1,2]::orderedpair;", "explanation": "Regression test for Domain in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select array[1,2]::orderedpair) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 31, "num_statements": 1} {"question": "What is the average number of satellites launched per year by each country, ordered by the country with the highest average?", "schema": "CREATE TABLE SatelliteMissions (MissionID INT, Name VARCHAR(50), LaunchCountry VARCHAR(50), LaunchDate DATE); INSERT INTO SatelliteMissions VALUES (1, 'GSAT-12', 'India', '2011-07-15'); INSERT INTO SatelliteMissions VALUES (2, 'GSAT-11', 'India', '2018-12-04');", "sql": "SELECT LaunchCountry, AVG(DATEDIFF(YEAR, LaunchDate, LEAD(LaunchDate) OVER (PARTITION BY LaunchCountry ORDER BY LaunchDate))) as avg_launches_per_year FROM SatelliteMissions GROUP BY LaunchCountry ORDER BY avg_launches_per_year DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 233, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which 2007 has a Tournament of grand slam sr?", "schema": "CREATE TABLE table_name_32 (tournament VARCHAR)", "sql": "SELECT 2007 FROM table_name_32 WHERE tournament = 'grand slam sr';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which charts had debut sales of of more than 339333.011497678?", "schema": "CREATE TABLE table_23180638_1 (oricon_albums_chart VARCHAR, debut_sales__copies_ INTEGER)", "sql": "SELECT oricon_albums_chart FROM table_23180638_1 WHERE debut_sales__copies_ > 339333.011497678;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the minimum number of reviews for hotels in Canada?", "schema": "CREATE TABLE hotels (id INT, name TEXT, country TEXT, reviews INT); INSERT INTO hotels (id, name, country, reviews) VALUES (1, 'Hotel A', 'Canada', 120), (2, 'Hotel B', 'Canada', 80), (3, 'Hotel C', 'Canada', 50);", "sql": "SELECT MIN(reviews) FROM hotels WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number listed?", "schema": "CREATE TABLE table_12113888_1 (number INTEGER)", "sql": "SELECT MAX(number) FROM table_12113888_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "How many workplaces in Canada have no labor rights violations?", "schema": "CREATE TABLE workplaces (id INT, country VARCHAR(50), has_lrv BOOLEAN); INSERT INTO workplaces (id, country, has_lrv) VALUES (1, 'Canada', true), (2, 'Canada', false), (3, 'Canada', false);", "sql": "SELECT COUNT(*) FROM workplaces WHERE country = 'Canada' AND has_lrv = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "How many size 14 dresses were sold in the last 3 months?", "schema": "CREATE TABLE sales (sale_id INTEGER, product_id INTEGER, size INTEGER, date DATE); INSERT INTO sales (sale_id, product_id, size, date) VALUES (1, 1001, 12, '2022-01-05'), (2, 1002, 8, '2022-02-10'), (3, 1003, 14, '2022-03-20');", "sql": "SELECT COUNT(*) FROM sales WHERE size = 14 AND date >= DATEADD(month, -3, CURRENT_DATE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 105).", "schema": null, "sql": "SELECT jsonb_array_length('[]');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_array_length('[]')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Error (example 49).", "schema": null, "sql": "/* raising plpy.spiexception.* from python code should preserve sqlstate\n */\nCREATE FUNCTION plpy_raise_spiexception() RETURNS void AS $$\nraise plpy.spiexceptions.DivisionByZero()\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Error.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Delete all records of ticket sales for a specific salesperson, identified by their salesperson ID.", "schema": "CREATE TABLE salesperson (salesperson_id INT, name VARCHAR(50), position VARCHAR(50)); CREATE TABLE tickets (ticket_id INT, salesperson_id INT, event_id INT, price DECIMAL(5,2), quantity INT); INSERT INTO salesperson VALUES (1, 'John Doe', 'Senior Salesperson'); INSERT INTO tickets VALUES (1, 1, 1, 50, 100);", "sql": "DELETE t FROM tickets t WHERE t.salesperson_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Create a view to display the job titles and departments of applicants", "schema": "CREATE TABLE TalentAcquisition (ApplicantID INT PRIMARY KEY, JobTitle VARCHAR(30), Department VARCHAR(20), ApplicationDate DATE);", "sql": "CREATE VIEW JobTitlesDepartments AS SELECT JobTitle, Department FROM TalentAcquisition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Find the number of movies released each year, by studios located in California, that have a runtime of over 2 hours.", "schema": "CREATE TABLE movies (id INT, title VARCHAR(255), release_year INT, runtime INT, studio_location VARCHAR(255));", "sql": "SELECT release_year, COUNT(*) as num_movies FROM movies WHERE studio_location = 'California' AND runtime > 120 GROUP BY release_year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which episode is number 3 in the season?", "schema": "CREATE TABLE table_29960651_5 (episode VARCHAR, no_for_season VARCHAR)", "sql": "SELECT episode FROM table_29960651_5 WHERE no_for_season = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (test-analyzers, item 3).", "schema": null, "sql": "CREATE INDEX idxanalyzers_test ON analyzers_test USING zombodb ((analyzers_test));", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What is the total sales of all drugs in India?", "schema": "CREATE TABLE sales (drug_id VARCHAR(10), country VARCHAR(10), sales_amount NUMERIC(12,2));", "sql": "SELECT SUM(sales_amount) FROM sales WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many records were there when opponents were 9?", "schema": "CREATE TABLE table_22815265_1 (record VARCHAR, opponents VARCHAR)", "sql": "SELECT COUNT(record) FROM table_22815265_1 WHERE opponents = 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 1999 tournament that has a Q1 in 2003 and a Q3 in 2002?", "schema": "CREATE TABLE table_name_2 (Id VARCHAR)", "sql": "SELECT 1999 FROM table_name_2 WHERE 2003 = 'q1' AND 2002 = 'q3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'privileges' (example 20).", "schema": null, "sql": "CREATE USER regress_priv_user8;", "explanation": "DDL from PostgreSQL core regression test for Privileges.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "What is the average age of patients with diabetes in rural Texas who are male?", "schema": "CREATE TABLE patients (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), diagnosis VARCHAR(50), location VARCHAR(50)); INSERT INTO patients (id, name, age, gender, diagnosis, location) VALUES (1, 'Jane Doe', 65, 'Female', 'Diabetes', 'Texas'), (2, 'John Doe', 45, 'Male', 'Diabetes', 'Texas'), (3, 'Jim Brown', 55, 'Male', 'Diabetes', 'Texas');", "sql": "SELECT AVG(age) FROM patients WHERE diagnosis = 'Diabetes' AND gender = 'Male' AND location = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Delete a customer record from the 'customers' table", "schema": "CREATE TABLE customers (customer_id INT, first_name VARCHAR(255), last_name VARCHAR(255), email VARCHAR(255), address VARCHAR(255));", "sql": "DELETE FROM customers WHERE customer_id = 1001;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the average media literacy score for users in each country who have completed at least one disinformation detection course?", "schema": "CREATE TABLE users (id INT, country VARCHAR(50), media_literacy_score INT, courses_completed INT);", "sql": "SELECT country, AVG(media_literacy_score) FROM users WHERE courses_completed > 0 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Find the number of attendees by age group and program type in 2021", "schema": "CREATE TABLE attendee_program_info (id INT, attendee_age INT, program_type VARCHAR(255), visit_year INT);", "sql": "SELECT attendee_age, program_type, COUNT(*) OVER (PARTITION BY attendee_age, program_type) AS attendees_by_age_and_program FROM attendee_program_info WHERE visit_year = 2021 ORDER BY attendee_age, program_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 211, "num_statements": 1} {"question": "Which biotech startups received funding in Canada and have a female founder?", "schema": "CREATE TABLE funding(startup VARCHAR(50), country VARCHAR(20), founder_gender VARCHAR(10));INSERT INTO funding(startup, country, founder_gender) VALUES('StartupA', 'Canada', 'Female'), ('StartupB', 'US', 'Male'), ('StartupC', 'Canada', 'Female');", "sql": "SELECT startup FROM funding WHERE country = 'Canada' AND founder_gender = 'Female';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many cultural heritage sites are there in Japan, and what is the average number of virtual tours per site?", "schema": "CREATE TABLE cultural_sites(site_id INT, site_name TEXT, country TEXT, num_virtual_tours INT); INSERT INTO cultural_sites(site_id, site_name, country, num_virtual_tours) VALUES (1, 'Temple Mount', 'Japan', 3), (2, 'Kinkaku-ji', 'Japan', 5), (3, 'Himeji Castle', 'Japan', 4);", "sql": "SELECT COUNT(*), AVG(num_virtual_tours) FROM cultural_sites WHERE country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the D 48 √ with a D 46 √ with r 33 o?", "schema": "CREATE TABLE table_name_87 (d_48_√ VARCHAR, d_46_√ VARCHAR)", "sql": "SELECT d_48_√ FROM table_name_87 WHERE d_46_√ = 'r 33 o';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Calculate the average home value for socially responsible loans in New York", "schema": "CREATE TABLE socially_responsible_loans (id INT, home_value FLOAT, state VARCHAR(255)); CREATE TABLE states (id INT, state VARCHAR(255), region VARCHAR(255));", "sql": "SELECT AVG(home_value) FROM socially_responsible_loans INNER JOIN states ON socially_responsible_loans.state = states.state WHERE states.state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which biggest interview score had an evening gown stat of 9.286?", "schema": "CREATE TABLE table_name_56 (interview INTEGER, evening_gown VARCHAR)", "sql": "SELECT MAX(interview) FROM table_name_56 WHERE evening_gown = 9.286;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 187).", "schema": null, "sql": "select count(*) from test_range_gist where ir && int4range(10,20);", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_range_gist where ir && int4range(10,20)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What state is 25% Democrats has a ratio of 6/2 of Republicans to Democrats?", "schema": "CREATE TABLE table_name_66 (state_ranked_in_partisan_order VARCHAR, percentage_democrats VARCHAR, republican__democratic VARCHAR)", "sql": "SELECT state_ranked_in_partisan_order FROM table_name_66 WHERE percentage_democrats = '25%' AND republican__democratic = '6/2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "How many public libraries were opened in each year from 2018 to 2021?", "schema": "CREATE TABLE Facilities (Year INT, FacilityType TEXT); INSERT INTO Facilities (Year, FacilityType) VALUES (2018, 'PublicLibrary'), (2019, 'PublicLibrary'), (2020, 'PublicLibrary'), (2021, 'PublicLibrary');", "sql": "SELECT Year, COUNT(*) FROM Facilities WHERE FacilityType = 'PublicLibrary' GROUP BY Year;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Calculate the total waste generated by each manufacturer in the North American region", "schema": "CREATE TABLE Manufacturers (manufacturer_id INT, manufacturer_name VARCHAR(50), region VARCHAR(50)); INSERT INTO Manufacturers (manufacturer_id, manufacturer_name, region) VALUES (1, 'ManufacturerA', 'North America'), (2, 'ManufacturerB', 'Europe'); CREATE TABLE WasteData (manufacturer_id INT, waste_quantity INT); INSERT INTO WasteData (manufacturer_id, waste_quantity) VALUES (1, 120), (1, 150), (2, 80);", "sql": "SELECT m.manufacturer_name, SUM(wd.waste_quantity) AS total_waste FROM Manufacturers m INNER JOIN WasteData wd ON m.manufacturer_id = wd.manufacturer_id WHERE m.region = 'North America' GROUP BY m.manufacturer_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Update the sustainability_score of the supplier with supplier_id 2 to 4.5", "schema": "UPDATE suppliers SET sustainability_score = 4.5 WHERE supplier_id = 2;", "sql": "UPDATE suppliers SET sustainability_score = 4.5 WHERE supplier_id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_generic' (example 18).", "schema": null, "sql": "CREATE AGGREGATE alt_agg2 (\n sfunc1 = int4mi, basetype = int4, stype1 = int4, initcond = 0\n);", "explanation": "DDL from PostgreSQL core regression test for Alter Generic.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 94, "num_statements": 1} {"question": "What is the success rate of therapy as a treatment approach for patients with anxiety in Japan?", "schema": "CREATE TABLE treatments (id INT, patient_id INT, approach VARCHAR(255), success BOOLEAN); INSERT INTO treatments (id, patient_id, approach, success) VALUES (1, 1, 'Medication', TRUE), (2, 2, 'Therapy', FALSE), (3, 3, 'Therapy', TRUE), (4, 4, 'Meditation', TRUE); INSERT INTO patients (id, age, condition, country) VALUES (1, 35, 'Anxiety', 'Japan'), (2, 42, 'Depression', 'Canada'), (3, 28, 'Anxiety', 'Japan'), (4, 31, 'Bipolar', 'Mexico');", "sql": "SELECT AVG(treatments.success) FROM treatments JOIN patients ON treatments.patient_id = patients.id WHERE patients.condition = 'Anxiety' AND patients.country = 'Japan' AND treatments.approach = 'Therapy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what day was game 2 played?", "schema": "CREATE TABLE table_name_56 (date VARCHAR, game VARCHAR)", "sql": "SELECT date FROM table_name_56 WHERE game = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What is the minimum production rate for wells in the Marcellus Shale?", "schema": "CREATE TABLE well_prod (well_name VARCHAR(50), location VARCHAR(50), rate FLOAT); INSERT INTO well_prod (well_name, location, rate) VALUES ('Well A', 'Marcellus Shale', 1200), ('Well B', 'Marcellus Shale', 1800);", "sql": "SELECT MIN(rate) FROM well_prod WHERE location = 'Marcellus Shale';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 53).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Taylor');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years was Lebanon the nationality?", "schema": "CREATE TABLE table_name_82 (years_in_orlando VARCHAR, nationality VARCHAR)", "sql": "SELECT years_in_orlando FROM table_name_82 WHERE nationality = 'lebanon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the seasons and winners that airs 28 january 2007", "schema": "CREATE TABLE table_16884579_1 (seasons_and_winners VARCHAR, premiere VARCHAR)", "sql": "SELECT seasons_and_winners FROM table_16884579_1 WHERE premiere = '28 January 2007';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which game did New Jersey Devils played in?", "schema": "CREATE TABLE table_name_54 (game VARCHAR, opponent VARCHAR)", "sql": "SELECT game FROM table_name_54 WHERE opponent = 'new jersey devils';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 37).", "schema": null, "sql": "SELECT citext_smaller( 'aardvark'::citext, 'Aaba'::citext ) = 'Aaba' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What was the minimum donation amount from individual donors in the West region in 2020?", "schema": "CREATE TABLE DonorContributions (donor_id INT, donation_amount DECIMAL(10,2), donation_date DATE, region VARCHAR(50)); INSERT INTO DonorContributions (donor_id, donation_amount, donation_date, region) VALUES (4, 300, '2020-01-01', 'West'), (5, 400, '2020-02-01', 'West'), (6, 200, '2020-03-01', 'East');", "sql": "SELECT MIN(donation_amount) FROM DonorContributions WHERE region = 'West' AND YEAR(donation_date) = 2020 AND donor_id NOT IN (SELECT donor_id FROM DonorContributions WHERE donation_type = 'Corporate');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 201, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'merge' (example 471).", "schema": null, "sql": "INSERT INTO cj_source1 VALUES (3, 10, 400);", "explanation": "DML from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Show the total revenue for the \"classical\" genre from all platforms, excluding any platforms that have a revenue lower than $10,000 for that genre.", "schema": "CREATE TABLE platformA (genre TEXT, revenue INT); CREATE TABLE platformB (genre TEXT, revenue INT); CREATE TABLE platformC (genre TEXT, revenue INT);", "sql": "SELECT genre, SUM(revenue) FROM (SELECT genre, revenue FROM platformA WHERE genre = 'classical' AND revenue >= 10000 UNION ALL SELECT genre, revenue FROM platformB WHERE genre = 'classical' AND revenue >= 10000 UNION ALL SELECT genre, revenue FROM platformC WHERE genre = 'classical' AND revenue >= 10000) AS combined_platforms GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 343, "num_statements": 1} {"question": "List all sculptures from the 20th century with their material and the name of the museum they are exhibited in, if available, sorted by the sculpture's creation date. If no museum is available, order by material.", "schema": "CREATE TABLE Sculptures (SculptureID INT, Title VARCHAR(50), CreationDate DATE, Material VARCHAR(50), MuseumID INT); CREATE TABLE Museums (MuseumID INT, Name VARCHAR(50)); INSERT INTO Sculptures VALUES (1, 'Bird in Space', '1923', 'Bronze', 1); INSERT INTO Museums VALUES (1, 'Museum of Modern Art');", "sql": "SELECT s.Title, s.CreationDate, s.Material, m.Name FROM Sculptures s LEFT JOIN Museums m ON s.MuseumID = m.MuseumID WHERE YEAR(s.CreationDate) >= 1900 ORDER BY s.CreationDate, s.Material;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many counties have people before profit as the party, and a borough greater than 0?", "schema": "CREATE TABLE table_name_16 (county INTEGER, party VARCHAR, borough VARCHAR)", "sql": "SELECT SUM(county) FROM table_name_16 WHERE party = 'people before profit' AND borough > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Count the number of streams for Latin songs in Spain during 2021.", "schema": "CREATE TABLE Streaming (country VARCHAR(50), genre VARCHAR(50), year INT, streams INT); INSERT INTO Streaming (country, genre, year, streams) VALUES ('Spain', 'Latin', 2021, 1500000); INSERT INTO Streaming (country, genre, year, streams) VALUES ('Spain', 'Latin', 2021, 1600000);", "sql": "SELECT COUNT(streams) FROM Streaming WHERE country = 'Spain' AND genre = 'Latin' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the metlink code that opened in 1908?", "schema": "CREATE TABLE table_3005450_1 (metlink_code VARCHAR, opened VARCHAR)", "sql": "SELECT metlink_code FROM table_3005450_1 WHERE opened = '1908';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Get the carrier_name and count of shipments for each warehouse_id from the shipment table where the shipped_date is between '2021-01-01' and '2021-12-31' grouped by carrier_name and warehouse_id", "schema": "CREATE TABLE shipment (shipment_id VARCHAR(10), status VARCHAR(20), warehouse_id VARCHAR(10), carrier_name VARCHAR(30), shipped_date DATE);", "sql": "SELECT carrier_name, warehouse_id, COUNT(*) as count FROM shipment WHERE shipped_date BETWEEN '2021-01-01' AND '2021-12-31' GROUP BY carrier_name, warehouse_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Which minerals were extracted in quantities greater than 1000 tons by companies that have mining operations in South America?", "schema": "CREATE TABLE company (id INT, name VARCHAR(255), country VARCHAR(255));CREATE TABLE extraction (company_id INT, mineral VARCHAR(255), amount INT);", "sql": "SELECT DISTINCT e.mineral FROM extraction e JOIN company c ON e.company_id = c.id WHERE c.country LIKE '%South America%' AND e.amount > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the Notes of the Country of finland?", "schema": "CREATE TABLE table_name_16 (notes VARCHAR, country VARCHAR)", "sql": "SELECT notes FROM table_name_16 WHERE country = 'finland';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Total when the Set 3 was 25-15?", "schema": "CREATE TABLE table_name_68 (total VARCHAR, set_3 VARCHAR)", "sql": "SELECT total FROM table_name_68 WHERE set_3 = '25-15';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In which district is the residence Dublin?", "schema": "CREATE TABLE table_26131768_4 (district VARCHAR, residence VARCHAR)", "sql": "SELECT district FROM table_26131768_4 WHERE residence = 'Dublin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.91.0--0.92.0 (assertion 76).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION has_index ( NAME, NAME, NAME, text )\nRETURNS TEXT AS $$\n SELECT CASE WHEN _is_schema( $1 ) THEN\n -- Looking for schema.table index.\n ok ( _have_index( $1, $2, $3 ), $4)\n ELSE\n -- Looking for particular columns.\n has_index( $1, $2, ARRAY[$3], $4 )\n END;\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.91.0--0.92.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 347, "num_statements": 2} {"question": "What is the total billable hours for cases opened in the last year?", "schema": "CREATE TABLE TimeEntries (EntryID INT, CaseID INT, Hours DECIMAL(10,2)); INSERT INTO TimeEntries (EntryID, CaseID, Hours) VALUES (1, 1, 5.00), (2, 2, 7.50);", "sql": "SELECT SUM(Hours) FROM TimeEntries INNER JOIN Cases ON TimeEntries.CaseID = Cases.CaseID WHERE OpenDate >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many items withdrawn had numbers over 5?", "schema": "CREATE TABLE table_name_93 (withdrawn VARCHAR, number INTEGER)", "sql": "SELECT COUNT(withdrawn) FROM table_name_93 WHERE number > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player from South Africa had a total less than 284?", "schema": "CREATE TABLE table_name_93 (player VARCHAR, total VARCHAR, country VARCHAR)", "sql": "SELECT player FROM table_name_93 WHERE total < 284 AND country = 'south africa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many picks did Jay Bruchak have?", "schema": "CREATE TABLE table_name_31 (pick VARCHAR, player VARCHAR)", "sql": "SELECT COUNT(pick) FROM table_name_31 WHERE player = 'jay bruchak';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of Year, when Result is 9th?", "schema": "CREATE TABLE table_name_10 (year INTEGER, result VARCHAR)", "sql": "SELECT SUM(year) FROM table_name_10 WHERE result = '9th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the name and location of all airports in the state of Texas with more than 100,000 annual passengers?", "schema": "CREATE TABLE Airports (id INT, name VARCHAR(100), location VARCHAR(100), annual_passengers INT, state VARCHAR(50)); INSERT INTO Airports (id, name, location, annual_passengers, state) VALUES (1, 'Dallas/Fort Worth International Airport', 'DFW', 69000000, 'Texas');", "sql": "SELECT name, location FROM Airports WHERE state = 'Texas' AND annual_passengers > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 42).", "schema": null, "sql": "SELECT latitude(ll_to_earth(-90,90))::numeric(20,10);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total energy consumption for each mining site in Q2 2022, ordered by the highest consumption?", "schema": "CREATE TABLE mining_sites (site_id INT, site_name VARCHAR(50), energy_consumption INT); INSERT INTO mining_sites (site_id, site_name, energy_consumption) VALUES (1, 'Site D', 4000), (2, 'Site E', 6000), (3, 'Site F', 3000);", "sql": "SELECT site_name, SUM(energy_consumption) as total_energy_consumption FROM mining_sites WHERE energy_consumption >= 0 AND consumption_date >= '2022-04-01' AND consumption_date <= '2022-06-30' GROUP BY site_name ORDER BY total_energy_consumption DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 250, "num_statements": 1} {"question": "List the number of factories in each state", "schema": "CREATE TABLE factories_state (id INT, name VARCHAR(50), state VARCHAR(50)); INSERT INTO factories_state (id, name, state) VALUES (1, 'Factory A', 'New York'), (2, 'Factory B', 'California'), (3, 'Factory C', 'Texas'), (4, 'Factory D', 'New York'), (5, 'Factory E', 'California');", "sql": "SELECT state, COUNT(*) FROM factories_state GROUP BY state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total revenue generated from upcycling initiatives in Q1 2021?", "schema": "CREATE TABLE sales (sale_id INT, initiative_type TEXT, initiative_region TEXT, revenue DECIMAL, sale_date DATE); INSERT INTO sales (sale_id, initiative_type, initiative_region, revenue, sale_date) VALUES (1, 'Upcycling', 'Europe', 500, '2021-01-01'), (2, 'Recycling', 'North America', 1000, '2021-01-02'), (3, 'Donation', 'Asia', 250, '2021-01-03'), (4, 'Upcycling', 'Europe', 750, '2021-04-01');", "sql": "SELECT SUM(revenue) FROM sales WHERE initiative_type = 'Upcycling' AND initiative_region = 'Europe' AND sale_date BETWEEN '2021-01-01' AND '2021-03-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What is the average speed of vessels arriving from Japan to the US west coast?", "schema": "CREATE TABLE vessels(id INT, name VARCHAR(50), country VARCHAR(50), speed FLOAT); INSERT INTO vessels(id, name, country, speed) VALUES (1, 'Vessel1', 'Japan', 25.3); INSERT INTO vessels(id, name, country, speed) VALUES (2, 'Vessel2', 'Japan', 27.1);", "sql": "SELECT AVG(speed) FROM vessels WHERE country = 'Japan' AND name IN ('Vessel1', 'Vessel2');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which To par has a Score larger than 68?", "schema": "CREATE TABLE table_name_42 (to_par VARCHAR, score INTEGER)", "sql": "SELECT to_par FROM table_name_42 WHERE score > 68;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many series have production code 8acx05", "schema": "CREATE TABLE table_26259391_1 (no_in_series VARCHAR, production_code VARCHAR)", "sql": "SELECT no_in_series FROM table_26259391_1 WHERE production_code = '8ACX05';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Update the price of the Veggie Burger to $13.00 in the menu_items table", "schema": "CREATE TABLE menu_items (menu_item_id INT, name VARCHAR(255), description TEXT, price DECIMAL(5,2), category VARCHAR(255), sustainability_rating INT);", "sql": "UPDATE menu_items SET price = 13.00 WHERE name = 'Veggie Burger';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average time to resolve a security incident for each incident type?", "schema": "CREATE TABLE security_incidents (id INT, incident_type VARCHAR(50), resolution_time INT); INSERT INTO security_incidents (id, incident_type, resolution_time) VALUES (1, 'Malware', 120), (2, 'Phishing', 180);", "sql": "SELECT incident_type, AVG(resolution_time) as avg_resolution_time FROM security_incidents GROUP BY incident_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Show the agroecology farmers' details and their offered produce in Asian regions.", "schema": "CREATE TABLE Agroecology_Farmers_4 (id INT PRIMARY KEY, name VARCHAR(50), age INT, location VARCHAR(50), sustainable_practices BOOLEAN); INSERT INTO Agroecology_Farmers_4 (id, name, age, location, sustainable_practices) VALUES (1, 'Hiroshi Tanaka', 45, 'Japanese Alps', true); INSERT INTO Agroecology_Farmers_4 (id, name, age, location, sustainable_practices) VALUES (2, 'Mei-Ling Chen', 50, 'Taiwan Highlands', true); CREATE TABLE Agroecology_Produce_4 (id INT PRIMARY KEY, product_name VARCHAR(50), price DECIMAL(5,2), farmer_id INT, location VARCHAR(50)); INSERT INTO Agroecology_Produce_4 (id, product_name, price, farmer_id, location) VALUES (1, 'Rice', 0.50, 1, 'Japanese Alps'); INSERT INTO Agroecology_Produce_4 (id, product_name, price, farmer_id, location) VALUES (2, 'Tea', 1.00, 2, 'Taiwan Highlands');", "sql": "SELECT af.name, af.location, ap.product_name, ap.price FROM Agroecology_Farmers_4 af INNER JOIN Agroecology_Produce_4 ap ON af.id = ap.farmer_id WHERE af.location IN ('Japanese Alps', 'Taiwan Highlands');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "What is the number of marine protected areas in each country in the 'marine_protected_areas' and 'countries' tables?\"", "schema": "CREATE TABLE marine_protected_areas (area_name VARCHAR(50), country_name VARCHAR(50)); CREATE TABLE countries (country_name VARCHAR(50), population INT);", "sql": "SELECT mpa.country_name, COUNT(*) as num_areas FROM marine_protected_areas mpa JOIN countries c ON mpa.country_name = c.country_name GROUP BY mpa.country_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total rank of team roanne, which has less than 14 games?", "schema": "CREATE TABLE table_name_58 (rank VARCHAR, team VARCHAR, games VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_58 WHERE team = 'roanne' AND games < 14;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many students have increased their mental health score by more than 10% in the last 6 months?", "schema": "CREATE TABLE student_mental_health_history (student_id INT, score INT, date DATE); INSERT INTO student_mental_health_history VALUES (1, 60, '2022-01-01'), (1, 66, '2022-07-01'), (2, 80, '2022-01-01'), (2, 88, '2022-07-01');", "sql": "SELECT COUNT(*) FROM (SELECT student_id, (score - LAG(score) OVER(PARTITION BY student_id ORDER BY date)) / LAG(score) OVER(PARTITION BY student_id ORDER BY date) * 100.0 as increase_percentage FROM student_mental_health_history WHERE date >= DATEADD(month, -6, GETDATE())) t WHERE increase_percentage > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 307, "num_statements": 1} {"question": "List the total number of patients with diabetes in rural hospitals, grouped by hospital location state.", "schema": "CREATE TABLE patients (patient_id INT, age INT, gender TEXT, rural BOOLEAN, disease TEXT, hospital_id INT); INSERT INTO patients (patient_id, age, gender, rural, disease, hospital_id) VALUES (1, 50, 'Female', true, 'Diabetes', 1); CREATE TABLE hospitals (hospital_id INT, hospital_name TEXT, beds INT, rural BOOLEAN, state_id INT); INSERT INTO hospitals (hospital_id, hospital_name, beds, rural, state_id) VALUES (1, 'Hospital A', 100, true, 1); CREATE TABLE states (state_id INT, state TEXT); INSERT INTO states (state_id, state) VALUES (1, 'Alabama'), (2, 'Alaska');", "sql": "SELECT states.state, SUM(CASE WHEN patients.disease = 'Diabetes' THEN 1 ELSE 0 END) patient_count FROM patients JOIN hospitals ON patients.hospital_id = hospitals.hospital_id JOIN states ON hospitals.state_id = states.state_id WHERE hospitals.rural = true GROUP BY states.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 278, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 128).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (2,9,'-59265296.260444467');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "How many startups were founded in the healthcare sector in 2020?", "schema": "CREATE TABLE startup (id INT, name TEXT, founding_year INT, industry TEXT); INSERT INTO startup (id, name, founding_year, industry) VALUES (1, 'HealthcareOne', 2020, 'Healthcare'); INSERT INTO startup (id, name, founding_year, industry) VALUES (2, 'TechStart', 2018, 'Tech');", "sql": "SELECT COUNT(*) FROM startup WHERE founding_year = 2020 AND industry = 'Healthcare';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date did Episode 22 originally air?", "schema": "CREATE TABLE table_23392257_4 (original_airdate VARCHAR, _number VARCHAR)", "sql": "SELECT COUNT(original_airdate) FROM table_23392257_4 WHERE _number = 22;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the number of events per genre, for visual arts events in Paris, in descending order?", "schema": "CREATE TABLE Events (id INT, event_name VARCHAR(100), event_type VARCHAR(50), location VARCHAR(100), start_time TIMESTAMP, end_time TIMESTAMP, genre VARCHAR(50));", "sql": "SELECT genre, COUNT(*) as num_events FROM Events WHERE location LIKE '%Paris%' AND event_type = 'visual arts' GROUP BY genre ORDER BY num_events DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Update the \"price\" column in the \"carbon_prices\" table to 28 for records where the \"country\" is 'France'", "schema": "CREATE TABLE carbon_prices ( id INT PRIMARY KEY, country VARCHAR(50), price DECIMAL(5,2)); INSERT INTO carbon_prices (id, country, price) VALUES (1, 'Germany', 20), (2, 'France', 18), (3, 'Spain', 22);", "sql": "UPDATE carbon_prices SET price = 28 WHERE country = 'France';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 255).", "schema": null, "sql": "SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "How many support programs were implemented in the Eastern region each month for the year 2021?", "schema": "CREATE TABLE SupportPrograms (ProgramID INT, ProgramName VARCHAR(50), Region VARCHAR(50), ImplementationMonth INT, ImplementationYear INT); INSERT INTO SupportPrograms (ProgramID, ProgramName, Region, ImplementationMonth, ImplementationYear) VALUES (1, 'Assistive Technology', 'Eastern', 1, 2021), (2, 'Sign Language Interpretation', 'Eastern', 2, 2021), (3, 'Accessible Furniture', 'Eastern', 3, 2021);", "sql": "SELECT ImplementationYear, ImplementationMonth, COUNT(ProgramID) FROM SupportPrograms WHERE Region = 'Eastern' GROUP BY ImplementationYear, ImplementationMonth;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the average mass of space debris in different categories?", "schema": "CREATE TABLE space_debris (category TEXT, mass FLOAT); INSERT INTO space_debris (category, mass) VALUES ('Aluminum', 120.5), ('Titanium', 170.1), ('Copper', 110.1), ('Steel', 300.2), ('Other', 50.2);", "sql": "SELECT category, AVG(mass) AS avg_mass FROM space_debris GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "How many cultural heritage sites are in each region?", "schema": "CREATE TABLE regions (region_id INT, region_name VARCHAR(255)); INSERT INTO regions (region_id, region_name) VALUES (1, 'North'), (2, 'South'), (3, 'East'), (4, 'West'); CREATE TABLE cultural_sites (site_id INT, site_name VARCHAR(255), region_id INT); INSERT INTO cultural_sites (site_id, site_name, region_id) VALUES (1, 'Museum A', 1), (2, 'Historic House B', 2), (3, 'Theater C', 3), (4, 'Gallery D', 4);", "sql": "SELECT region_name, COUNT(*) as num_sites FROM cultural_sites JOIN regions ON cultural_sites.region_id = regions.region_id GROUP BY region_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Diameter of the Dog Sled (gold variant) Theme coin?", "schema": "CREATE TABLE table_name_74 (diameter VARCHAR, theme VARCHAR)", "sql": "SELECT diameter FROM table_name_74 WHERE theme = 'dog sled (gold variant)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 65).", "schema": null, "sql": "SELECT longitude(ll_to_earth(90,180))::numeric(20,10);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 143).", "schema": null, "sql": "SELECT interval '0' AS \"zero\",\n interval '1-2' year to month AS \"year-month\",\n interval '1 2:03:04' day to second AS \"day-time\",\n - interval '1-2' AS \"negative year-month\",\n - interval '1 2:03:04' AS \"negative day-time\";", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '0' AS \"zero\",\n interval '1-2' year to month AS \"year-month\",\n interval '1 2:03:04' day to second AS \"day-time\",\n - interval '1-2' AS \"negative year-month\",\n - interval '1 2:03:04' AS \"negative day-time\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 307, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the lowest no. of attendance on record?", "schema": "CREATE TABLE table_16119656_1 (attendance INTEGER)", "sql": "SELECT MIN(attendance) FROM table_16119656_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the average number of marine species found in each protected marine area in Southeast Asia?", "schema": "CREATE TABLE marine_areas (area_id INT, name VARCHAR(50), region VARCHAR(50), num_marine_species INT); INSERT INTO marine_areas (area_id, name, region, num_marine_species) VALUES (1, 'Tubbataha Reefs Natural Park', 'Southeast Asia', 600), (2, 'Sundarbans National Park', 'Southeast Asia', 550), (3, 'Great Barrier Reef', 'Australia', 1500);", "sql": "SELECT AVG(num_marine_species) FROM marine_areas WHERE region = 'Southeast Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Original title, when Film title used in nomination is Train Without A Timetable?", "schema": "CREATE TABLE table_name_35 (original_title VARCHAR, film_title_used_in_nomination VARCHAR)", "sql": "SELECT original_title FROM table_name_35 WHERE film_title_used_in_nomination = 'train without a timetable';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "What is the average budget of cybersecurity exercises in the last 6 months?", "schema": "CREATE TABLE CybersecurityExercises (Id INT, Name VARCHAR(50), Budget FLOAT, Date DATE); INSERT INTO CybersecurityExercises (Id, Name, Budget, Date) VALUES (1, 'Exercise1', 5000, '2021-01-01'); INSERT INTO CybersecurityExercises (Id, Name, Budget, Date) VALUES (2, 'Exercise2', 7000, '2021-02-15');", "sql": "SELECT AVG(Budget) FROM CybersecurityExercises WHERE Date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "How many ports are available in the database?", "schema": "CREATE TABLE ports (port_code CHAR(3), port_name VARCHAR(20)); INSERT INTO ports (port_code, port_name) VALUES ('LA', 'Los Angeles'), ('NY', 'New York'), ('MIA', 'Miami'), ('HOU', 'Houston'), ('SFO', 'San Francisco');", "sql": "SELECT COUNT(DISTINCT ports.port_code) as port_count FROM ports;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 31).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (0,4,'0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total production cost of garments produced in the last 12 months using sustainable materials?", "schema": "CREATE TABLE Sustainable_Material_Garments_Time (id INT, production_date DATE, production_cost DECIMAL);", "sql": "SELECT SUM(production_cost) FROM Sustainable_Material_Garments_Time WHERE production_date >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "PostgreSQL Typeconv: show example 7.", "schema": null, "sql": "SELECT @ '-4.5e500' AS \"abs\"; ERROR: \"-4.5e500\" is out of range for type double precision;", "explanation": "Example from PostgreSQL documentation on Typeconv.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 2} {"question": "Determine the number of unique donors who made donations in the last quarter from the 'donations' table.", "schema": "CREATE TABLE donations (id INT, donor_name VARCHAR(50), donation_date DATE, amount DECIMAL(10,2));", "sql": "SELECT COUNT(DISTINCT donor_name) FROM donations WHERE donation_date >= DATEADD(quarter, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the total donation amount for each program in the 'ProgramDonations' table, and the average donation amount for each program?", "schema": "CREATE TABLE ProgramDonations (DonationID INT, ProgramName VARCHAR(50), DonationAmount DECIMAL(10, 2), DonationDate DATE);", "sql": "SELECT ProgramName, SUM(DonationAmount) AS TotalDonations, AVG(DonationAmount) AS AvgDonation FROM ProgramDonations GROUP BY ProgramName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "What are the names of the factories with the highest waste generation?", "schema": "CREATE TABLE factories (name TEXT, id INTEGER, waste_generation FLOAT); INSERT INTO factories (name, id, waste_generation) VALUES ('Factory A', 1, 500), ('Factory B', 2, 700), ('Factory C', 3, 800); CREATE TABLE landfill_capacity (factory_id INTEGER, capacity INTEGER); INSERT INTO landfill_capacity (factory_id, capacity) VALUES (1, 1000), (2, 1500), (3, 2000);", "sql": "SELECT f.name, f.waste_generation FROM factories f ORDER BY f.waste_generation DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What is the AI adoption percentage for 'boutique hotels'?", "schema": "CREATE TABLE hotels_ai (id INT, type TEXT, ai BOOLEAN); INSERT INTO hotels_ai (id, type, ai) VALUES (1, 'Boutique', true), (2, 'Luxury', false), (3, 'Boutique', false);", "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM hotels_ai WHERE type = 'Boutique') FROM hotels_ai WHERE type = 'Boutique' AND ai = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the kurdistan democratic party for kurdistan list being 10", "schema": "CREATE TABLE table_24440361_1 (kurdistan_democratic_party INTEGER, total_kurdistan_list VARCHAR)", "sql": "SELECT MIN(kurdistan_democratic_party) FROM table_24440361_1 WHERE total_kurdistan_list = 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the mascot when the county is 43 kosciusko?", "schema": "CREATE TABLE table_name_82 (mascot VARCHAR, county VARCHAR)", "sql": "SELECT mascot FROM table_name_82 WHERE county = '43 kosciusko';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average number of autonomous vehicle tests for each city?", "schema": "CREATE TABLE autonomous_vehicles (id INT, city_id INT, model VARCHAR(50), year INT, tests INT); INSERT INTO autonomous_vehicles (id, city_id, model, year, tests) VALUES (4, 3, 'Zoox', 2022, 60000); INSERT INTO autonomous_vehicles (id, city_id, model, year, tests) VALUES (5, 3, 'NVIDIA Drive', 2021, 50000);", "sql": "SELECT city_id, AVG(tests) FROM autonomous_vehicles GROUP BY city_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When has Score of 4–5 and quebec nordiques as Visitor?", "schema": "CREATE TABLE table_name_92 (date VARCHAR, score VARCHAR, visitor VARCHAR)", "sql": "SELECT date FROM table_name_92 WHERE score = '4–5' AND visitor = 'quebec nordiques';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the minimum fare for the commuter rail in Boston?", "schema": "CREATE TABLE fares (fare_id INT, route_id INT, fare DECIMAL(5,2), fare_type VARCHAR(20)); INSERT INTO fares (fare_id, route_id, fare, fare_type) VALUES (1, 1, 8.00, 'Commuter Rail'), (2, 2, 6.50, 'Subway'), (3, 3, 9.00, 'Commuter Rail');", "sql": "SELECT MIN(fare) FROM fares WHERE fare_type = 'Commuter Rail';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Present the warehouse locations and their respective total storage capacity (in cubic meters) for the region 'Europe' as of 2022-02-01.", "schema": "CREATE TABLE Warehouses (WarehouseID INT, WarehouseLocation VARCHAR(100), Region VARCHAR(50), StorageCapacity DECIMAL(10,2)); INSERT INTO Warehouses VALUES (1, 'Warehouse B', 'Europe', 5000);", "sql": "SELECT Warehouses.WarehouseLocation, SUM(Warehouses.StorageCapacity) as TotalStorageCapacity FROM Warehouses WHERE Warehouses.Region = 'Europe' AND Warehouses.StorageCapacity IS NOT NULL GROUP BY Warehouses.WarehouseLocation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Show a SQL definition from the citus project (views_create, item 31).", "schema": null, "sql": "CREATE VIEW \"local regular view4\" as SELECT count(*) as \"my cny\" FROM dist_regular_view JOIN \"local regular view\" USING (id);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Update the round type to 'Series D' in the \"investment_rounds\" table for the record with id 7", "schema": "CREATE TABLE investment_rounds (id INT, company_name VARCHAR(100), round_type VARCHAR(50), raised_amount FLOAT, round_date DATE); INSERT INTO investment_rounds (id, company_name, round_type, raised_amount, round_date) VALUES (6, 'Hotel Ltd.', 'Series B', 7000000, '2018-02-15'), (7, 'India Inc.', 'Series A', 5000000, '2018-01-01');", "sql": "UPDATE investment_rounds SET round_type = 'Series D' WHERE id = 7;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the circuit for 20 august", "schema": "CREATE TABLE table_name_99 (circuit VARCHAR, date VARCHAR)", "sql": "SELECT circuit FROM table_name_99 WHERE date = '20 august';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the total number of mining accidents and the associated labor productivity metrics for each country in Africa?", "schema": "CREATE TABLE african_countries (id INT, country TEXT); INSERT INTO african_countries (id, country) VALUES (1, 'South Africa'), (2, 'Ghana'), (3, 'Zambia'), (4, 'Tanzania'), (5, 'Mali'); CREATE TABLE mines (id INT, country TEXT, accidents INT, productivity FLOAT); INSERT INTO mines (id, country, accidents, productivity) VALUES (1, 'South Africa', 5, 1.2), (2, 'Ghana', 3, 1.5), (3, 'Zambia', 7, 1.8), (4, 'Tanzania', 4, 1.3), (5, 'Mali', 6, 1.6);", "sql": "SELECT m.country, COUNT(m.id) AS total_accidents, AVG(m.productivity) AS avg_productivity FROM mines m GROUP BY m.country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "What is the total number of vehicles sold in the Sales_Data table in the first half of 2021?", "schema": "CREATE TABLE Sales_Data (Sale_Date DATE, Vehicle_Type VARCHAR(20), Quantity_Sold INT);", "sql": "SELECT SUM(Quantity_Sold) FROM Sales_Data WHERE Sale_Date BETWEEN '2021-01-01' AND '2021-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who lost with a time of 0:58?", "schema": "CREATE TABLE table_name_44 (loser VARCHAR, time VARCHAR)", "sql": "SELECT loser FROM table_name_44 WHERE time = '0:58';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Update email of players from 'US' to 'new_email@usa.com'", "schema": "player (player_id, name, email, age, gender, country, total_games_played)", "sql": "UPDATE player SET email = 'new_email@usa.com' WHERE country = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which opponent played a 1990 wcq type of game, where the results were 0:0?", "schema": "CREATE TABLE table_name_99 (opponent VARCHAR, type_of_game VARCHAR, results¹ VARCHAR)", "sql": "SELECT opponent FROM table_name_99 WHERE type_of_game = '1990 wcq' AND results¹ = '0:0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sequence' (example 17).", "schema": null, "sql": "CREATE SEQUENCE sequence_test13 AS smallint MINVALUE -32768;", "explanation": "DDL from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 91).", "schema": null, "sql": "SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the names of states that have some college students playing in goalie and mid positions.", "schema": "CREATE TABLE tryout (cName VARCHAR, pPos VARCHAR); CREATE TABLE college (state VARCHAR, cName VARCHAR)", "sql": "SELECT T1.state FROM college AS T1 JOIN tryout AS T2 ON T1.cName = T2.cName WHERE T2.pPos = 'goalie' INTERSECT SELECT T1.state FROM college AS T1 JOIN tryout AS T2 ON T1.cName = T2.cName WHERE T2.pPos = 'mid';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much was the cost of in-county tuition per credit hour at the Gloucester College by the fall of 2009?", "schema": "CREATE TABLE table_22308881_2 (in_county_tuition_per_credit_hour__fall_2009_ VARCHAR, college VARCHAR)", "sql": "SELECT in_county_tuition_per_credit_hour__fall_2009_ FROM table_22308881_2 WHERE college = 'Gloucester';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the minimum rank", "schema": "CREATE TABLE table_11174272_1 (rank INTEGER)", "sql": "SELECT MIN(rank) FROM table_11174272_1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "How many fish are farmed in each country based on the fish_farming table?", "schema": "CREATE TABLE countries (country_id INT, country_name VARCHAR(50)); INSERT INTO countries (country_id, country_name) VALUES (1, 'Norway'), (2, 'Chile'), (3, 'Canada'); CREATE TABLE fish_farming (country_id INT, species VARCHAR(50), quantity INT); INSERT INTO fish_farming (country_id, species, quantity) VALUES (1, 'Salmon', 5000), (2, 'Tilapia', 3000), (3, 'Trout', 4000), (1, 'Cod', 2000);", "sql": "SELECT c.country_name, SUM(ff.quantity) as total_farmed_fish FROM countries c JOIN fish_farming ff ON c.country_id = ff.country_id GROUP BY c.country_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest Frequency where the Hemholtz pitch is d?", "schema": "CREATE TABLE table_name_48 (frequency___hz__ INTEGER, helmholtz_pitch VARCHAR)", "sql": "SELECT MIN(frequency___hz__) FROM table_name_48 WHERE helmholtz_pitch = 'd';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "List all climate adaptation policies enacted by Pacific Island nations and their corresponding impact scores.", "schema": "CREATE TABLE policies (id INT, name VARCHAR(50), country VARCHAR(50), type VARCHAR(50), enactment_date DATE); INSERT INTO policies (id, name, country, type, enactment_date) VALUES (1, 'Coastal Protection Policy', 'Fiji', 'Adaptation', '2018-01-01'); INSERT INTO policies (id, name, country, type, enactment_date) VALUES (2, 'Climate-Resilient Infrastructure Policy', 'Marshall Islands', 'Adaptation', '2019-05-05'); INSERT INTO policies (id, name, country, type, enactment_date) VALUES (3, 'Water Management Policy', 'Palau', 'Adaptation', '2017-12-12'); INSERT INTO policies (id, name, country, type, enactment_date) VALUES (4, 'Disaster Risk Reduction Policy', 'Samoa', 'Adaptation', '2020-06-26'); INSERT INTO policies (id, name, country, type, enactment_date) VALUES (5, 'Agricultural Adaptation Policy', 'Vanuatu', 'Adaptation', '2021-09-15');", "sql": "SELECT p.name, p.country, p.type, p.enactment_date, coalesce(avg(i.impact_score), 0) as avg_impact_score FROM policies p LEFT JOIN impact_assessments i ON p.id = i.policy_id WHERE p.type = 'Adaptation' AND p.country LIKE 'Pacific%' GROUP BY p.name, p.country, p.type, p.enactment_date;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "pgTAP test for Pktap (assertion 25).", "schema": null, "sql": "SELECT * FROM check_test(\n col_is_pk( 'sometab', 'id' ),\n true,\n 'col_is_pk( table, column )',\n 'Column sometab(id) should be a primary key',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Pktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score that has a Winnipeg Jets as Visitor on april 7?", "schema": "CREATE TABLE table_name_10 (score VARCHAR, visitor VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_10 WHERE visitor = 'winnipeg jets' AND date = 'april 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the fate and location of the Ringstad?", "schema": "CREATE TABLE table_name_3 (fate_and_location VARCHAR, ship VARCHAR)", "sql": "SELECT fate_and_location FROM table_name_3 WHERE ship = 'ringstad';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total number of successful and failed aid deliveries for each organization?", "schema": "CREATE TABLE aid_deliveries (delivery_id INT, organization VARCHAR(50), delivery_status VARCHAR(10)); INSERT INTO aid_deliveries (delivery_id, organization, delivery_status) VALUES (1, 'Org A', 'successful'), (2, 'Org B', 'failed'), (3, 'Org A', 'successful'), (4, 'Org C', 'successful'), (5, 'Org B', 'failed'), (6, 'Org A', 'successful'); CREATE TABLE organizations (org_id INT, name VARCHAR(50)); INSERT INTO organizations (org_id, name) VALUES (1, 'Org A'), (2, 'Org B'), (3, 'Org C');", "sql": "SELECT o.name, SUM(CASE WHEN ad.delivery_status = 'successful' THEN 1 ELSE 0 END) AS num_successful, SUM(CASE WHEN ad.delivery_status = 'failed' THEN 1 ELSE 0 END) AS num_failed FROM aid_deliveries ad JOIN organizations o ON ad.organization = o.name GROUP BY o.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 266, "num_statements": 1} {"question": "as of 2022", "schema": "INSERT INTO music_genres_ext (id, genre, popularity) VALUES (4, 'Hip Hop', 30000000);", "sql": "INSERT INTO music_genres_ext (id, genre, popularity, release_year);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the total amount of funding received by graduate students from research grants in the Mathematics department?", "schema": "CREATE TABLE graduate_students (id INT, name VARCHAR(50), department VARCHAR(50)); CREATE TABLE research_grants (id INT, graduate_student_id INT, amount DECIMAL(10,2));", "sql": "SELECT SUM(rg.amount) FROM research_grants rg JOIN graduate_students gs ON rg.graduate_student_id = gs.id WHERE gs.department = 'Mathematics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "What is the maximum budget allocated for ethical AI initiatives in a single year in Europe?", "schema": "CREATE TABLE Ethical_AI_Budget_Year (Year INT, Budget INT);", "sql": "SELECT MAX(Budget) FROM Ethical_AI_Budget_Year WHERE Year IN (SELECT Year FROM Ethical_AI_Budget_Year WHERE Country IN ('Germany', 'France', 'Spain', 'Italy', 'UK') GROUP BY Year);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Create a view for active recycling initiatives", "schema": "CREATE TABLE recycling_initiatives ( id INT PRIMARY KEY, region VARCHAR(255), initiative_name VARCHAR(255), initiative_description TEXT, start_date DATE, end_date DATE); INSERT INTO recycling_initiatives (id, region, initiative_name, initiative_description, start_date, end_date) VALUES (1, 'Africa', 'Plastic Bottle Collection', 'Collecting plastic bottles in schools and parks.', '2020-01-01', '2020-12-31'), (2, 'South America', 'E-Waste Disposal', 'Establishing e-waste drop-off points in major cities.', '2019-06-15', '2021-06-14'), (3, 'Oceania', 'Composting Program', 'Implementing composting programs in households.', '2018-04-22', '2022-04-21'), (4, 'Antarctica', 'Research and Development', 'Researching new waste reduction methods.', '2023-07-04', '2026-07-03'); CREATE VIEW active_recycling_initiatives AS SELECT * FROM recycling_initiatives WHERE end_date >= CURDATE();", "sql": "CREATE VIEW active_recycling_initiatives AS SELECT * FROM recycling_initiatives WHERE end_date >= CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "How many renewable energy projects were completed in each region in the past 3 years?", "schema": "CREATE TABLE renewable_projects (id INT, region VARCHAR(50), completion_year INT);", "sql": "SELECT region, COUNT(*) FROM renewable_projects WHERE completion_year >= YEAR(CURRENT_DATE) - 3 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What location was the game on October 6?", "schema": "CREATE TABLE table_21761882_4 (location VARCHAR, date VARCHAR)", "sql": "SELECT location FROM table_21761882_4 WHERE date = 'October 6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 2010 for tournament of us open", "schema": "CREATE TABLE table_name_95 (tournament VARCHAR)", "sql": "SELECT 2010 FROM table_name_95 WHERE tournament = 'us open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Determine the total sales revenue for each month of the year", "schema": "CREATE TABLE sales_data_2 (sale_id INT, product_id INT, sale_date DATE, price DECIMAL(5,2), quantity INT); INSERT INTO sales_data_2 (sale_id, product_id, sale_date, price, quantity) VALUES (6, 1, '2021-02-01', 12.50, 10), (7, 2, '2021-03-02', 13.00, 15), (8, 3, '2021-04-03', 12.75, 12), (9, 4, '2021-05-04', 45.00, 5), (10, 5, '2021-06-05', 35.00, 3);", "sql": "SELECT DATE_FORMAT(sale_date, '%Y-%m') AS month, SUM(price * quantity) AS total_sales_revenue FROM sales_data_2 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 18).", "schema": null, "sql": "SELECT ts_lexize('hunspell', 'skies');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_lexize('hunspell', 'skies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Number of community development initiatives, by region and gender, for the year 2018?", "schema": "CREATE TABLE community_development (id INT, region VARCHAR(255), gender VARCHAR(255), initiative_count INT, year INT); INSERT INTO community_development (id, region, gender, initiative_count, year) VALUES (1, 'Caribbean', 'Female', 120, 2018), (2, 'Caribbean', 'Male', 85, 2018), (3, 'Central America', 'Female', 160, 2018);", "sql": "SELECT region, gender, SUM(initiative_count) as total_initiative_count FROM community_development WHERE year = 2018 GROUP BY region, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "What is the total number of humanitarian assistance missions conducted by each military branch in 2020?", "schema": "CREATE TABLE military_branch (id INT PRIMARY KEY, name VARCHAR(255)); INSERT INTO military_branch (id, name) VALUES (1, 'Army'), (2, 'Navy'), (3, 'Air Force'), (4, 'Marines'), (5, 'Coast Guard'); CREATE TABLE missions (id INT PRIMARY KEY, military_branch_id INT, type VARCHAR(255), year INT, FOREIGN KEY (military_branch_id) REFERENCES military_branch(id)); INSERT INTO missions (id, military_branch_id, type, year) VALUES (1, 1, 'Humanitarian Assistance', 2020), (2, 2, 'Humanitarian Assistance', 2020), (3, 3, 'Humanitarian Assistance', 2020), (4, 4, 'Humanitarian Assistance', 2020), (5, 5, 'Humanitarian Assistance', 2020);", "sql": "SELECT m.name, COUNT(missions.id) as total_missions FROM missions JOIN military_branch m ON missions.military_branch_id = m.id WHERE missions.type = 'Humanitarian Assistance' AND missions.year = 2020 GROUP BY m.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of record for 61", "schema": "CREATE TABLE table_27722408_10 (record VARCHAR, game VARCHAR)", "sql": "SELECT COUNT(record) FROM table_27722408_10 WHERE game = 61;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What's the minimum ESG score for companies in the 'healthcare' or 'pharmaceutical' sectors?", "schema": "CREATE TABLE companies_esg_3 (id INT, sector VARCHAR(20), ESG_score FLOAT); INSERT INTO companies_esg_3 (id, sector, ESG_score) VALUES (1, 'healthcare', 72.5), (2, 'pharmaceutical', 80.2), (3, 'healthcare', 76.1);", "sql": "SELECT MIN(ESG_score) FROM companies_esg_3 WHERE sector IN ('healthcare', 'pharmaceutical');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every prefix class for the equivalent of NTE101?", "schema": "CREATE TABLE table_30011_2 (prefix_class VARCHAR, equivalent VARCHAR)", "sql": "SELECT prefix_class FROM table_30011_2 WHERE equivalent = 'NTE101';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which chassis has marlboro brm as the team?", "schema": "CREATE TABLE table_name_89 (chassis VARCHAR, team VARCHAR)", "sql": "SELECT chassis FROM table_name_89 WHERE team = 'marlboro brm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 335).", "schema": null, "sql": "select jsonb_path_query('\"1.23aaa\"', '$.bigint()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"1.23aaa\"', '$.bigint()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is essendon's away team score?", "schema": "CREATE TABLE table_name_1 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_1 WHERE away_team = 'essendon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the total number of hours spent on open pedagogy projects by students in each school?", "schema": "CREATE TABLE open_pedagogy_projects (id INT, project_name VARCHAR(50), school_id INT, student_id INT, hours_spent INT); CREATE TABLE students (id INT, name VARCHAR(50), age INT, school_id INT); CREATE TABLE schools (id INT, school_name VARCHAR(50), PRIMARY KEY(id));", "sql": "SELECT s.school_name, SUM(opp.hours_spent) as total_hours_spent FROM open_pedagogy_projects opp JOIN students st ON opp.student_id = st.id JOIN schools s ON st.school_id = s.id GROUP BY s.school_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "How many circular economy initiatives were implemented in the residential sector in 2019?", "schema": "CREATE TABLE CircularEconomy (year INT, sector VARCHAR(20), initiatives INT); INSERT INTO CircularEconomy (year, sector, initiatives) VALUES (2018, 'residential', 300), (2018, 'commercial', 200), (2019, 'residential', NULL), (2019, 'commercial', 300);", "sql": "SELECT initiatives FROM CircularEconomy WHERE year = 2019 AND sector = 'residential';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Which regions have the highest VR set adoption rates?", "schema": "CREATE TABLE Region_VR_Adoption (Region VARCHAR(20), VR_Users INT, Total_Users INT); INSERT INTO Region_VR_Adoption (Region, VR_Users, Total_Users) VALUES ('North America', 5000, 10000), ('Europe', 7000, 15000), ('Asia', 8000, 20000), ('South America', 3000, 12000), ('Africa', 1000, 5000);", "sql": "SELECT Region, (VR_Users * 100.0 / Total_Users) AS Adoption_Rate FROM Region_VR_Adoption;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What was the average attendance for exhibitions in the 'Art Museum' in 2020?", "schema": "CREATE TABLE Art_Exhibition (exhibition_id INT, museum_name VARCHAR(255), year INT, attendance INT);", "sql": "SELECT AVG(attendance) FROM Art_Exhibition WHERE museum_name = 'Art Museum' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Bronze has a Total smaller than 1?", "schema": "CREATE TABLE table_name_15 (bronze INTEGER, total INTEGER)", "sql": "SELECT AVG(bronze) FROM table_name_15 WHERE total < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 165).", "schema": null, "sql": "SELECT array_agg(x) || array_agg(x) FROM (VALUES (ROW(1,2)), (ROW(3,4))) v(x);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT array_agg(x) || array_agg(x) FROM (VALUES (ROW(1,2)), (ROW(3,4))) v(x)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What county has others at 5?", "schema": "CREATE TABLE table_20573232_1 (county VARCHAR, others_number VARCHAR)", "sql": "SELECT county FROM table_20573232_1 WHERE others_number = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'varchar': Write the SELECT query (example 12).", "schema": null, "sql": "SELECT c.*\n FROM VARCHAR_TBL c\n WHERE c.f1 = 'a';", "explanation": "Regression test for Varchar in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT c.*\n FROM VARCHAR_TBL c\n WHERE c.f1 = 'a') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Delete records from the 'player_scores' table where the player's score is below 500", "schema": "CREATE TABLE player_scores (player_id INT, score INT); INSERT INTO player_scores (player_id, score) VALUES (1, 600), (2, 300), (3, 700);", "sql": "WITH low_scores AS (DELETE FROM player_scores WHERE score < 500 RETURNING *) SELECT * FROM low_scores;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": true, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the mean week number for November 12, 1978?", "schema": "CREATE TABLE table_name_9 (week INTEGER, date VARCHAR)", "sql": "SELECT AVG(week) FROM table_name_9 WHERE date = 'november 12, 1978';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team was promoted in the Serbian League East in the same season when Kolubara was promoted in the Serbian League Belgrade?", "schema": "CREATE TABLE table_12283621_6 (serbian_league_east VARCHAR, serbian_league_belgrade VARCHAR)", "sql": "SELECT serbian_league_east FROM table_12283621_6 WHERE serbian_league_belgrade = 'Kolubara';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the broadcast network for saitama prefecture", "schema": "CREATE TABLE table_21076286_2 (broadcast_network VARCHAR, broadcast_scope VARCHAR)", "sql": "SELECT broadcast_network FROM table_21076286_2 WHERE broadcast_scope = 'Saitama Prefecture';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 80.", "schema": null, "sql": "SELECT 3 OPERATOR(pg_catalog.+) 4;", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 34, "num_statements": 1} {"question": "What is the sum of donations and number of volunteers for each program in Q3 2021?", "schema": "CREATE TABLE ProgramVolunteers (ProgramID INT, VolunteerCount INT); INSERT INTO ProgramVolunteers (ProgramID, VolunteerCount) VALUES (1, 10), (2, 20);", "sql": "SELECT ProgramName, SUM(DonationAmount) as TotalDonation, SUM(VolunteerCount) as TotalVolunteers FROM Programs JOIN ProgramVolunteers ON Programs.ProgramID = ProgramVolunteers.ProgramID WHERE DonationDate BETWEEN '2021-07-01' AND '2021-09-30' GROUP BY ProgramName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 264, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What type of car does Jeff Fuller drive?", "schema": "CREATE TABLE table_2182170_1 (car_s_ VARCHAR, driver_s_ VARCHAR)", "sql": "SELECT car_s_ FROM table_2182170_1 WHERE driver_s_ = 'Jeff Fuller';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number for simon cellan jones", "schema": "CREATE TABLE table_26914076_2 (no VARCHAR, directed_by VARCHAR)", "sql": "SELECT no FROM table_26914076_2 WHERE directed_by = 'Simon Cellan Jones';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the most attended cultural event in 'events' table for 2022?", "schema": "CREATE TABLE events (event_id INT, title VARCHAR(50), year INT, location VARCHAR(50), attendance INT); INSERT INTO events (event_id, title, year, location, attendance) VALUES (1, 'Art Exhibition', 2022, 'Paris', 15000); INSERT INTO events (event_id, title, year, location, attendance) VALUES (2, 'Theater Performance', 2022, 'Tokyo', 12000);", "sql": "SELECT title, MAX(attendance) FROM events WHERE year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the title of every song, and how many weeks was each song at #1 for Rihanna in 2012?", "schema": "CREATE TABLE table_19542477_9 (song_s__—_weeks VARCHAR, issue_years VARCHAR, artist_s_ VARCHAR)", "sql": "SELECT song_s__—_weeks FROM table_19542477_9 WHERE issue_years = 2012 AND artist_s_ = 'Rihanna';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Delete all records of workers who have been terminated and replace them with new hires who have received ethical manufacturing training.", "schema": "CREATE TABLE Workers (ID INT, Terminated BOOLEAN, Ethical_Training BOOLEAN); INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (1, TRUE, FALSE); INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (2, FALSE, TRUE); INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (3, TRUE, FALSE); INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (4, FALSE, TRUE); DELETE FROM Workers WHERE Terminated = TRUE; INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (5, FALSE, TRUE); INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (6, FALSE, TRUE);", "sql": "DELETE FROM Workers WHERE Terminated = TRUE; INSERT INTO Workers (ID, Terminated, Ethical_Training) VALUES (5, FALSE, TRUE), (6, FALSE, TRUE);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 2} {"question": "Show a query using PostgreSQL contrib extension 'pg_trgm' (example 78).", "schema": null, "sql": "insert into test2 values ('%line 2');", "explanation": "Example query from the 'pg_trgm' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Rank countries by the percentage of international visitors who travel sustainably in 2019.", "schema": "CREATE TABLE CountrySustainableTravel (country_id INT, year INT, pct_sustainable_travel FLOAT); INSERT INTO CountrySustainableTravel (country_id, year, pct_sustainable_travel) VALUES (1, 2019, 0.5); INSERT INTO CountrySustainableTravel (country_id, year, pct_sustainable_travel) VALUES (2, 2019, 0.7); INSERT INTO CountrySustainableTravel (country_id, year, pct_sustainable_travel) VALUES (3, 2019, 0.6); INSERT INTO CountrySustainableTravel (country_id, year, pct_sustainable_travel) VALUES (4, 2019, 0.8);", "sql": "SELECT country_id, RANK() OVER (ORDER BY pct_sustainable_travel DESC) as rank FROM CountrySustainableTravel WHERE year = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "What was the total amount of funding received by cultural organizations in Brazil and Russia?", "schema": "CREATE TABLE CulturalOrganizations (id INT, organization_name VARCHAR(50), country VARCHAR(50), funding_amount DECIMAL(10, 2)); INSERT INTO CulturalOrganizations (id, organization_name, country, funding_amount) VALUES (1, 'Museum', 'Brazil', 50000.00), (2, 'Theater', 'Russia', 75000.00), (3, 'Art Gallery', 'Brazil', 60000.00), (4, 'Dance Company', 'Russia', 80000.00);", "sql": "SELECT SUM(funding_amount) FROM CulturalOrganizations WHERE country IN ('Brazil', 'Russia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 97).", "schema": null, "sql": "SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE RETURNING jsonb);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE RETURNING jsonb)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "Find the number of distinct species in the 'habitat_preservation' program", "schema": "CREATE TABLE habitat_preservation (animal_id INT, species VARCHAR(20)); INSERT INTO habitat_preservation (animal_id, species) VALUES (1, 'tiger'), (2, 'elephant'), (3, 'tiger');", "sql": "SELECT COUNT(DISTINCT species) FROM habitat_preservation;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of explainable AI models with a satisfaction score greater than 85?", "schema": "CREATE TABLE explainable_ai (model_name TEXT, satisfaction_score INTEGER, date DATE); INSERT INTO explainable_ai (model_name, satisfaction_score, date) VALUES ('Model1', 80, '2020-01-01'), ('Model2', 85, '2019-04-03'), ('Model3', 90, '2021-05-22'), ('Model4', 87, '2021-02-15');", "sql": "SELECT COUNT(*) FROM explainable_ai WHERE satisfaction_score > 85;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much February has a Game larger than 37, and an Opponent of chicago black hawks?", "schema": "CREATE TABLE table_name_95 (february VARCHAR, game VARCHAR, opponent VARCHAR)", "sql": "SELECT COUNT(february) FROM table_name_95 WHERE game > 37 AND opponent = 'chicago black hawks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the decision when Chicago was the visiting team?", "schema": "CREATE TABLE table_name_85 (decision VARCHAR, visitor VARCHAR)", "sql": "SELECT decision FROM table_name_85 WHERE visitor = 'chicago';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Identify the space organizations that have launched satellites in the last 3 years.", "schema": "CREATE TABLE organization_satellites (organization TEXT, launch_date DATE); INSERT INTO organization_satellites (organization, launch_date) VALUES ('NASA', '2020-01-01'), ('NASA', '2019-01-01'), ('ESA', '2018-01-01'), ('SpaceX', '2021-01-01'), ('ISRO', '2020-01-01');", "sql": "SELECT DISTINCT organization FROM organization_satellites WHERE launch_date >= DATEADD(year, -3, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many yards did Davin Meggett have with more than 9 Rec.?", "schema": "CREATE TABLE table_name_62 (yards INTEGER, player VARCHAR, rec VARCHAR)", "sql": "SELECT SUM(yards) FROM table_name_62 WHERE player = 'davin meggett' AND rec > 9;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Count the number of military equipment maintenance requests from each branch of the military", "schema": "CREATE TABLE maintenance_requests (request_id INT, equipment_id INT, request_date DATE, branch VARCHAR(255)); CREATE VIEW equipment_by_branch AS SELECT equipment_id, branch FROM military_equipment JOIN maintenance_requests ON military_equipment.equipment_id = maintenance_requests.equipment_id;", "sql": "SELECT branch, COUNT(*) as num_requests FROM equipment_by_branch GROUP BY branch;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average budget per student for accommodations in each building?", "schema": "CREATE TABLE Students (StudentID INT, BuildingID INT); INSERT INTO Students (StudentID, BuildingID) VALUES (1, 1); INSERT INTO Students (StudentID, BuildingID) VALUES (2, 1); INSERT INTO Students (StudentID, BuildingID) VALUES (3, 2);", "sql": "SELECT b.BuildingName, AVG(bb.BudgetAmount / COUNT(s.StudentID)) AS AvgBudgetPerStudent FROM BuildingBudgets bb INNER JOIN Buildings b ON bb.BuildingID = b.BuildingID INNER JOIN Students s ON bb.BuildingID = s.BuildingID GROUP BY b.BuildingName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 245, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the notes when the displacement is 220cid (3,604cc)?", "schema": "CREATE TABLE table_name_78 (notes VARCHAR, displacement VARCHAR)", "sql": "SELECT notes FROM table_name_78 WHERE displacement = '220cid (3,604cc)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many players are there in each city, and which games are they playing?", "schema": "CREATE TABLE Players (PlayerID INT, City VARCHAR(50)); INSERT INTO Players (PlayerID, City) VALUES (1, 'Los Angeles'), (2, 'Paris'), (3, 'Tokyo'), (4, 'Sydney'); CREATE TABLE GameSessions (PlayerID INT, GameID INT); INSERT INTO GameSessions (PlayerID, GameID) VALUES (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (4, 4); CREATE TABLE GameDesign (GameID INT, GameName VARCHAR(50)); INSERT INTO GameDesign (GameID, GameName) VALUES (1, 'Space Explorer'), (2, 'Racing Fever'), (3, 'VR Puzzle'), (4, 'Underwater Adventure');", "sql": "SELECT Players.City, COUNT(Players.PlayerID) AS Players_in_City, GameDesign.GameName FROM Players INNER JOIN GameSessions ON Players.PlayerID = GameSessions.PlayerID INNER JOIN GameDesign ON GameSessions.GameID = GameDesign.GameID GROUP BY Players.City, GameDesign.GameName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 274, "num_statements": 1} {"question": "Find the total number of hotel listings in Europe and Asia, excluding duplicates.", "schema": "CREATE TABLE hotel_listings (hotel_id INT, location VARCHAR(20)); INSERT INTO hotel_listings (hotel_id, location) VALUES (1, 'Paris'), (2, 'Berlin'), (3, 'Tokyo');", "sql": "SELECT location, COUNT(DISTINCT hotel_id) as total_hotels FROM hotel_listings WHERE location IN ('Europe', 'Asia') GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the crowd size of the match featuring Hawthorn as the Away team?", "schema": "CREATE TABLE table_name_84 (crowd VARCHAR, away_team VARCHAR)", "sql": "SELECT crowd FROM table_name_84 WHERE away_team = 'hawthorn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "What is the average account balance of low-income borrowers in Latin America who have taken out socially responsible loans?", "schema": "CREATE TABLE Customers (CustomerID int, IncomeLevel varchar(50), Location varchar(50)); INSERT INTO Customers (CustomerID, IncomeLevel, Location) VALUES (1, 'Low Income', 'Latin America'); CREATE TABLE Loans (LoanID int, CustomerID int, Type varchar(50), SociallyResponsible bit); INSERT INTO Loans (LoanID, CustomerID, Type, SociallyResponsible) VALUES (1, 1, 'Personal Loan', 1); CREATE TABLE Accounts (AccountID int, CustomerID int, Balance decimal(10,2)); INSERT INTO Accounts (AccountID, CustomerID, Balance) VALUES (1, 1, 500.00);", "sql": "SELECT AVG(A.Balance) FROM Accounts A INNER JOIN Customers C ON A.CustomerID = C.CustomerID INNER JOIN Loans L ON C.CustomerID = L.CustomerID WHERE C.Location = 'Latin America' AND C.IncomeLevel = 'Low Income' AND L.SociallyResponsible = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "Show the number of smart contracts deployed per month for the 'Polkadot' network in the current year.", "schema": "CREATE TABLE polkadot_network (deployment_date DATE, smart_contract_count INTEGER, network_name TEXT);", "sql": "SELECT DATE_TRUNC('month', deployment_date) as month, COUNT(*) as smart_contracts_deployed FROM polkadot_network WHERE network_name = 'Polkadot' AND EXTRACT(YEAR FROM deployment_date) = EXTRACT(YEAR FROM CURRENT_DATE) GROUP BY month ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 248, "num_statements": 1} {"question": "What is the maximum budget for climate mitigation projects in each continent?", "schema": "CREATE TABLE climate_mitigation(project_name TEXT, country TEXT, budget FLOAT); INSERT INTO climate_mitigation(project_name, country, budget) VALUES ('Project P', 'USA', 800000.00), ('Project Q', 'Brazil', 900000.00), ('Project R', 'South Africa', 700000.00);", "sql": "SELECT country, MAX(budget) FROM climate_mitigation GROUP BY (CASE WHEN country = 'USA' THEN 'North America' WHEN country = 'Brazil' THEN 'South America' WHEN country = 'South Africa' THEN 'Africa' END);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 203, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the opponent the White Sox and the record 18-13?", "schema": "CREATE TABLE table_name_90 (date VARCHAR, opponent VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_90 WHERE opponent = 'white sox' AND record = '18-13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Which countries have the highest number of defense contracts?", "schema": "CREATE TABLE Contract_Country (id INT, country VARCHAR(50), contract_count INT); INSERT INTO Contract_Country (id, country, contract_count) VALUES (1, 'USA', 50), (2, 'Canada', 30); CREATE TABLE Contract_Country_Mapping (contract_id INT, country_id INT); INSERT INTO Contract_Country_Mapping (contract_id, country_id) VALUES (1, 1), (2, 1), (3, 2);", "sql": "SELECT Contract_Country.country, SUM(Contract_Country_Mapping.contract_id) AS contract_count FROM Contract_Country JOIN Contract_Country_Mapping ON Contract_Country.id = Contract_Country_Mapping.country_id GROUP BY Contract_Country.country ORDER BY contract_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "What is the total number of crimes reported in 2020 and 2021?", "schema": "CREATE TABLE Crime (cid INT, year INT, category VARCHAR(255), location VARCHAR(255));", "sql": "SELECT YEAR(Crime.year), COUNT(*) FROM Crime WHERE Crime.year IN (2020, 2021) GROUP BY YEAR(Crime.year);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "pgTAP test for Util (assertion 16).", "schema": null, "sql": "SELECT is(\n test_variadic(),\n 'foo\nbar\nbaz',\n 'variadic collect_tap() should simply collect tap'\n);", "explanation": "SQL assertion from pgTAP test suite for Util.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many segment b's when segment a is filigree glass", "schema": "CREATE TABLE table_15187735_16 (segment_b VARCHAR, segment_a VARCHAR)", "sql": "SELECT COUNT(segment_b) FROM table_15187735_16 WHERE segment_a = 'Filigree Glass';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 7).", "schema": null, "sql": "SELECT * FROM test_varbit WHERE i='100'::varbit ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which writer wrote episode 18-09 (652)?", "schema": "CREATE TABLE table_name_29 (writer_s_ VARCHAR, episode VARCHAR)", "sql": "SELECT writer_s_ FROM table_name_29 WHERE episode = '18-09 (652)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Update the year of events to 2025 if the year is greater than 2023", "schema": "CREATE TABLE events (event_id INT, event_name VARCHAR(50), year INT); INSERT INTO events (event_id, event_name, year) VALUES (1, 'FIFA World Cup', 2022), (2, 'NBA Finals', 2023), (3, 'Super Bowl', 2022);", "sql": "UPDATE events SET year = 2025 WHERE year > 2023;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the German Grand Prix?", "schema": "CREATE TABLE table_1140082_2 (location VARCHAR, race VARCHAR)", "sql": "SELECT location FROM table_1140082_2 WHERE race = 'German Grand Prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the production code of the episode with a series number 37b?", "schema": "CREATE TABLE table_14035132_3 (production_code VARCHAR, no_in_series VARCHAR)", "sql": "SELECT production_code FROM table_14035132_3 WHERE no_in_series = '37b';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'rangefuncs': Write the SELECT query (example 373).", "schema": null, "sql": "select * from\n (select jsonb_path_query_array(module->'lectures', '$[*]') as lecture\n from unnest(array['{\"lectures\": [{\"id\": \"1\"}]}'::jsonb])\n as unnested_modules(module)) as ss,\n jsonb_to_recordset(ss.lecture) as j (id text);", "explanation": "Regression test for Rangefuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from\n (select jsonb_path_query_array(module->'lectures', '$[*]') as lecture\n from unnest(array['{\"lectures\": [{\"id\": \"1\"}]}'::jsonb])\n as unnested_modules(module)) as ss,\n jsonb_to_recordset(ss.lecture) as j (id text)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 238, "num_statements": 1} {"question": "Insert a new cultural heritage site in Kyoto, Japan named \"Kiyomizu-dera\" with an ID of 3", "schema": "CREATE TABLE cultural_sites (site_id INT, name TEXT, location TEXT); INSERT INTO cultural_sites (site_id, name, location) VALUES (1, 'Meiji Shrine', 'Tokyo'); INSERT INTO cultural_sites (site_id, name, location) VALUES (2, 'Todai-ji Temple', 'Nara');", "sql": "INSERT INTO cultural_sites (site_id, name, location) VALUES (3, 'Kiyomizu-dera', 'Kyoto');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "What is the total CO2 emissions for the 'suppliers' table?", "schema": "CREATE TABLE suppliers (id INT, name VARCHAR(50), co2_emissions INT); INSERT INTO suppliers (id, name, co2_emissions) VALUES (1, 'Supplier A', 500), (2, 'Supplier B', 800), (3, 'Supplier C', 300);", "sql": "SELECT SUM(co2_emissions) FROM suppliers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the total amount of money invested in the technology sector?", "schema": "CREATE TABLE investments (id INT, sector VARCHAR(255), amount FLOAT); INSERT INTO investments (id, sector, amount) VALUES (1, 'Technology', 9000000.0), (2, 'Technology', 10000000.0), (3, 'Healthcare', 8000000.0);", "sql": "SELECT SUM(amount) FROM investments WHERE sector = 'Technology';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'updatable_views' (example 183).", "schema": null, "sql": "$$\nlanguage plpgsql;\ncreate trigger base_tab_def_view_instrig instead of insert on base_tab_def_view\n for each row execute function base_tab_def_view_instrig_func();\ntruncate base_tab_def;\ninsert into base_tab_def values (1);\ninsert into base_tab_def values (2), (3);\ninsert into base_tab_def values (4, default, default, default, default);\ninsert into base_tab_def values (5, default, default, default, default),\n (6, default, default, default, default);\ninsert into base_tab_def_view values (11);\ninsert into base_tab_def_view values (12), (13);\ninsert into base_tab_def_view values (14, default, default, default, default);\ninsert into base_tab_def_view values (15, default, default, default, default),\n (16, default, default, default, default);\ninsert into base_tab_def_view values (17), (default);\nselect * from base_tab_def order by a;\n\n-- Using an unconditional DO INSTEAD rule should also cause NULLs to be\n-- inserted where there are no view defaults.\ndrop trigger base_tab_def_view_instrig on base_tab_def_view;\ndrop function base_tab_def_view_instrig_func;\ncreate rule base_tab_def_view_ins_rule as on insert to base_tab_def_view\n do instead insert into base_tab_def values (new.a, new.b, new.c, new.d, new.e);\ntruncate base_tab_def;\ninsert into base_tab_def values (1);\ninsert into base_tab_def values (2), (3);\ninsert into base_tab_def values (4, default, default, default, default);\ninsert into base_tab_def values (5, default, default, default, default),\n (6, default, default, default, default);\ninsert into base_tab_def_view values (11);\ninsert into base_tab_def_view values (12), (13);\ninsert into base_tab_def_view values (14, default, default, default, default);\ninsert into base_tab_def_view values (15, default, default, default, default),\n (16, default, default, default, default);\ninsert into base_tab_def_view values (17), (default);\nselect * from base_tab_def order by a;\n\n-- A DO ALSO rule should cause each row to be inserted twice. The first\n-- insert should behave the same as an auto-updatable view (using table\n-- defaults, unless overridden by view defaults). The second insert should\n-- behave the same as a rule-updatable view (inserting NULLs where there are\n-- no view defaults).\ndrop rule base_tab_def_view_ins_rule on base_tab_def_view;\ncreate rule base_tab_def_view_ins_rule as on insert to base_tab_def_view\n do also insert into base_tab_def values (new.a, new.b, new.c, new.d, new.e);\ntruncate base_tab_def;\ninsert into base_tab_def values (1);\ninsert into base_tab_def values (2), (3);\ninsert into base_tab_def values (4, default, default, default, default);\ninsert into base_tab_def values (5, default, default, default, default),\n (6, default, default, default, default);\ninsert into base_tab_def_view values (11);\ninsert into base_tab_def_view values (12), (13);\ninsert into base_tab_def_view values (14, default, default, default, default);\ninsert into base_tab_def_view values (15, default, default, default, default),\n (16, default, default, default, default);\ninsert into base_tab_def_view values (17), (default);\nselect * from base_tab_def order by a, c NULLS LAST;\n\n-- Test a DO ALSO INSERT ... SELECT rule\ndrop rule base_tab_def_view_ins_rule on base_tab_def_view;\ncreate rule base_tab_def_view_ins_rule as on insert to base_tab_def_view\n do also insert into base_tab_def (a, b, e) select new.a, new.b, 'xxx';\ntruncate base_tab_def;\ninsert into base_tab_def_view values (1, default, default, default, default);\ninsert into base_tab_def_view values (2, default, default, default, default),\n (3, default, default, default, default);\nselect * from base_tab_def order by a, e nulls first;\n\ndrop view base_tab_def_view;\ndrop table base_tab_def;\n\n-- Test defaults with array assignments\ncreate table base_tab (a serial, b int[], c text, d text default 'Table default');\ncreate view base_tab_view as select c, a, b from base_tab;\nalter view base_tab_view alter column c set default 'View default';\ninsert into base_tab_view (b[1], b[2], c, b[5], b[4], a, b[3])\nvalues (1, 2, default, 5, 4, default, 3), (10, 11, 'C value', 14, 13, 100, 12);\nselect * from base_tab order by a;\ndrop view base_tab_view;\ndrop table base_tab;", "explanation": "PL/pgSQL object from PostgreSQL core test for Updatable Views.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 4408, "num_statements": 55} {"question": "What is the difference in assets between customers with savings and checking?", "schema": "CREATE TABLE Accounts (CustomerID INT, AccountType VARCHAR(50), Balance DECIMAL(10,2)); INSERT INTO Accounts (CustomerID, AccountType, Balance) VALUES (1, 'Savings', 10000); INSERT INTO Accounts (CustomerID, AccountType, Balance) VALUES (2, 'Checking', 5000);", "sql": "SELECT SUM(CASE WHEN AccountType = 'Savings' THEN Balance ELSE -Balance END) FROM Accounts;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What U.S. Rap has life in the concrete jungle as the album?", "schema": "CREATE TABLE table_name_15 (us_rap VARCHAR, album VARCHAR)", "sql": "SELECT us_rap FROM table_name_15 WHERE album = 'life in the concrete jungle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 421).", "schema": null, "sql": "select to_oct(256*256*256 - 1) AS \"77777777\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select to_oct(256*256*256 - 1) AS \"77777777\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 72).", "schema": null, "sql": "-- It should ignore the view.\nSELECT * FROM check_test(\n sequence_owner_is('public', 'someview', current_user, 'mumble'),\n\tfalse,\n 'sequence_owner_is(sch, view, user, desc)',\n 'mumble',\n ' Sequence public.someview does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 247, "num_statements": 1} {"question": "How many news articles were published in each quarter of 2022?", "schema": "CREATE TABLE news_publication_dates_2 (title VARCHAR(100), publication_date DATE); INSERT INTO news_publication_dates_2 (title, publication_date) VALUES ('Article 6', '2022-01-01'), ('Article 7', '2022-02-03'), ('Article 8', '2022-02-15'), ('Article 9', '2022-03-05'), ('Article 10', '2022-04-10'), ('Article 11', '2022-05-12');", "sql": "SELECT EXTRACT(QUARTER FROM publication_date) AS quarter, COUNT(*) AS articles_published FROM news_publication_dates_2 GROUP BY quarter;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the equipment for bike number 6", "schema": "CREATE TABLE table_16941304_4 (equipment VARCHAR, bike_no VARCHAR)", "sql": "SELECT equipment FROM table_16941304_4 WHERE bike_no = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'predicate' (example 56).", "schema": null, "sql": "CREATE INDEX pred_tab_pred_idx ON pred_tab (a) WHERE b IS NOT NULL AND c IS NOT NULL;", "explanation": "DDL from PostgreSQL core regression test for Predicate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: At what venue was the game played where the away team was South Melbourne", "schema": "CREATE TABLE table_name_49 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_49 WHERE away_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Team, when Location Attendance is \"Rose Garden 20,020\"?", "schema": "CREATE TABLE table_name_69 (team VARCHAR, location_attendance VARCHAR)", "sql": "SELECT team FROM table_name_69 WHERE location_attendance = 'rose garden 20,020';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What is the average transaction amount for each digital asset in the 'crypto_transactions' table, partitioned by week?", "schema": "CREATE TABLE crypto_transactions (transaction_id INT, digital_asset VARCHAR(20), transaction_amount DECIMAL(10,2), transaction_time DATETIME);", "sql": "SELECT digital_asset, AVG(transaction_amount) as avg_transaction_amount, DATE_TRUNC('week', transaction_time) as week FROM crypto_transactions GROUP BY digital_asset, week ORDER BY week;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "How many properties were co-owned in each city, ordered by the number of co-owned properties in the 'co_ownership' table?", "schema": "CREATE TABLE co_ownership (id INT, city VARCHAR(255), property_id INT); INSERT INTO co_ownership (id, city, property_id) VALUES (1, 'Seattle', 101), (2, 'Seattle', 102), (3, 'Portland', 103), (4, 'Portland', 104), (5, 'Portland', 105), (6, 'Boston', 106);", "sql": "SELECT city, COUNT(property_id) OVER (PARTITION BY city ORDER BY COUNT(property_id) DESC) AS num_co_owned_properties FROM co_ownership;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "What is the average rating for cruelty-free products in each brand in the database?", "schema": "CREATE TABLE Brand_Rating (id INT, brand VARCHAR(255), product VARCHAR(255), rating INT, cruelty_free BOOLEAN); INSERT INTO Brand_Rating (id, brand, product, rating, cruelty_free) VALUES (1, 'Lush', 'Soak Stimulant Bath Bomb', 5, true), (2, 'The Body Shop', 'Born Lippy Strawberry Lip Balm', 4, true), (3, 'Estee Lauder', 'Advanced Night Repair Synchronized Recovery Complex II', 5, false), (4, 'Lush', 'Angels on Bare Skin Cleanser', 4, true), (5, 'The Body Shop', 'Tea Tree Skin Clearing Facial Wash', 3, true);", "sql": "SELECT brand, AVG(rating) as avg_rating FROM Brand_Rating WHERE cruelty_free = true GROUP BY brand;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many breadth entries are there when the vessel is marianarray?", "schema": "CREATE TABLE table_28132970_5 (breadth VARCHAR, vessel VARCHAR)", "sql": "SELECT COUNT(breadth) FROM table_28132970_5 WHERE vessel = 'Marianarray';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Calculate the total volume of carbon sequestered in the year 2020 in private and public forests across the United States.", "schema": "CREATE TABLE forests_us (id INT, name VARCHAR(50), state VARCHAR(50), is_private BOOLEAN, is_national_park BOOLEAN); INSERT INTO forests_us (id, name, state, is_private, is_national_park) VALUES (1, 'Adirondack Forest', 'New York', true, false); CREATE TABLE carbon_sequestration_us (id INT, forest_id INT, year INT, sequestration FLOAT); INSERT INTO carbon_sequestration_us (id, forest_id, year, sequestration) VALUES (1, 1, 2020, 2500);", "sql": "SELECT SUM(cs.sequestration) FROM carbon_sequestration_us cs JOIN forests_us f ON cs.forest_id = f.id WHERE f.is_private = true OR f.is_national_park = true AND cs.year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 253).", "schema": null, "sql": "create view tt15v as select row(i)::nestedcomposite from int8_tbl i;", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 11).", "schema": null, "sql": "SELECT * FROM t1p_ones WHERE o > 50 AND p like '%64%';", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Get the names of all menu items that have a price greater than 20 dollars", "schema": "CREATE TABLE menu_items (item_id INT, item_name TEXT, price DECIMAL(5,2)); INSERT INTO menu_items (item_id, item_name, price) VALUES (1, 'Burger', 9.99), (2, 'Lobster', 34.99), (3, 'Salad', 15.50);", "sql": "SELECT item_name FROM menu_items WHERE price > 20.00;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Which education resources were distributed in South Sudan in Q4 2021?", "schema": "CREATE TABLE education_resources (id INT, resource TEXT, quantity INT, country TEXT, quarter INT, year INT); INSERT INTO education_resources (id, resource, quantity, country, quarter, year) VALUES (1, 'Textbooks', 500, 'South Sudan', 4, 2021), (2, 'School Supplies', 300, 'South Sudan', 4, 2021), (3, 'Laptops', 200, 'South Sudan', 4, 2021);", "sql": "SELECT DISTINCT resource FROM education_resources WHERE country = 'South Sudan' AND quarter = 4 AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What are the unique astrophysics observations made in the Antarctic region?", "schema": "CREATE TABLE Astrophysics_Observations (id INT, observation VARCHAR(100), researcher VARCHAR(100), observation_region VARCHAR(100), observation_date DATE); INSERT INTO Astrophysics_Observations (id, observation, researcher, observation_region, observation_date) VALUES (1, 'Gamma Ray Burst', 'Dr. J. Doe', 'Arctic', '2021-12-18'), (2, 'Neutron Star Merger', 'Dr. A. Smith', 'Antarctic', '2018-02-03');", "sql": "SELECT DISTINCT observation FROM Astrophysics_Observations WHERE observation_region = 'Antarctic';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many times was Arizona the team and the round was bigger than 11?", "schema": "CREATE TABLE table_name_86 (overall VARCHAR, school_club_team VARCHAR, round VARCHAR)", "sql": "SELECT COUNT(overall) FROM table_name_86 WHERE school_club_team = 'arizona' AND round > 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "How many art exhibitions were there in total in 2018 and 2019 combined?", "schema": "CREATE TABLE Exhibitions (id INT, year INT);INSERT INTO Exhibitions (id, year) VALUES (1, 2018), (2, 2019), (3, 2018), (4, 2017);", "sql": "SELECT COUNT(*) FROM Exhibitions WHERE year IN (2018, 2019);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 18).", "schema": null, "sql": "select regexp_match('abc', '(B)(c)', 'i');", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select regexp_match('abc', '(B)(c)', 'i')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 96).", "schema": null, "sql": "-- Check that all serialization functions have signature\n-- serialize(internal) returns bytea\n-- Also insist that they be strict; it's wasteful to run them on NULLs.\n\nSELECT a.aggfnoid, p.proname\nFROM pg_aggregate as a, pg_proc as p\nWHERE a.aggserialfn = p.oid AND\n (p.prorettype != 'bytea'::regtype OR p.pronargs != 1 OR\n p.proargtypes[0] != 'internal'::regtype OR\n NOT p.proisstrict);", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 397, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What are the number of points for associated with exactly 3 stolen ends?", "schema": "CREATE TABLE table_25176088_2 (pf VARCHAR, stolen_ends VARCHAR)", "sql": "SELECT COUNT(pf) FROM table_25176088_2 WHERE stolen_ends = 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--0.4.2--1.0.0, item 9).", "schema": null, "sql": "/*\n * Create the trigger function for the parent table of a time-based partition set\n */\nCREATE OR REPLACE FUNCTION create_time_function(p_parent_table text) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nv_control text;\nv_current_partition_name text;\nv_current_partition_timestamp timestamptz;\nv_datetime_string text;\nv_final_partition_timestamp timestamptz;\nv_job_id bigint;\nv_jobmon_schema text;\nv_old_search_path text;\nv_next_partition_name text;\nv_next_partition_timestamp timestamptz;\nv_part_interval interval;\nv_premake int;\nv_prev_partition_name text;\nv_prev_partition_timestamp timestamptz;\nv_step_id bigint;\nv_trig_func text;\nv_type text;\n\nBEGIN\n\nSELECT nspname INTO v_jobmon_schema FROM pg_namespace n, pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\nIF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE 'SELECT set_config(''search_path'',''@extschema@,'||v_jobmon_schema||''',''false'')';\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_job_id := add_job('PARTMAN CREATE FUNCTION: '||p_parent_table);\n v_step_id := add_step(v_job_id, 'Creating partition function for table '||p_parent_table);\nEND IF;\n\nSELECT type\n , part_interval::interval\n , control\n , premake\n , datetime_string\nINTO v_type\n , v_part_interval\n , v_control\n , v_premake\n , v_datetime_string\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table\nAND (type = 'time-static' OR type = 'time-dynamic');\n\nIF NOT FOUND THEN\n RAISE EXCEPTION 'ERROR: no config found for %', p_parent_table;\nEND IF;\n\nIF v_type = 'time-static' THEN\n\n CASE\n WHEN v_part_interval = '15 mins' THEN\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '15min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 15.0);\n WHEN v_part_interval = '30 mins' THEN\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP) +\n '30min'::interval * floor(date_part('minute', CURRENT_TIMESTAMP) / 30.0);\n WHEN v_part_interval = '1 hour' THEN\n v_current_partition_timestamp := date_trunc('hour', CURRENT_TIMESTAMP);\n WHEN v_part_interval = '1 day' THEN\n v_current_partition_timestamp := date_trunc('day', CURRENT_TIMESTAMP);\n WHEN v_part_interval = '1 week' THEN\n v_current_partition_timestamp := date_trunc('week', CURRENT_TIMESTAMP);\n WHEN v_part_interval = '1 month' THEN\n v_current_partition_timestamp := date_trunc('month', CURRENT_TIMESTAMP);\n WHEN v_part_interval = '3 months' THEN\n v_current_partition_timestamp := date_trunc('quarter', CURRENT_TIMESTAMP);\n WHEN v_part_interval = '1 year' THEN\n v_current_partition_timestamp := date_trunc('year', CURRENT_TIMESTAMP);\n END CASE;\n\n v_current_partition_name := p_parent_table || '_p' || to_char(v_current_partition_timestamp, v_datetime_string);\n v_next_partition_timestamp := v_current_partition_timestamp + v_part_interval::interval;\n\n v_trig_func := 'CREATE OR REPLACE FUNCTION '||p_parent_table||'_part_trig_func() RETURNS trigger LANGUAGE plpgsql AS $t$\n BEGIN\n IF TG_OP = ''INSERT'' THEN\n IF NEW.'||v_control||' >= '||quote_literal(v_current_partition_timestamp)||' AND NEW.'||v_control||' < '||quote_literal(v_next_partition_timestamp)|| ' THEN\n INSERT INTO '||v_current_partition_name||' VALUES (NEW.*); ';\n FOR i IN 1..v_premake LOOP\n v_prev_partition_timestamp := v_current_partition_timestamp - (v_part_interval::interval * i);\n v_next_partition_timestamp := v_current_partition_timestamp + (v_part_interval::interval * i);\n v_final_partition_timestamp := v_next_partition_timestamp + (v_part_interval::interval);\n v_prev_partition_name := p_parent_table || '_p' || to_char(v_prev_partition_timestamp, v_datetime_string);\n v_next_partition_name := p_parent_table || '_p' || to_char(v_next_partition_timestamp, v_datetime_string);\n\n v_trig_func := v_trig_func ||'\n ELSIF NEW.'||v_control||' >= '||quote_literal(v_prev_partition_timestamp)||' AND NEW.'||v_control||' < '||\n quote_literal(v_prev_partition_timestamp + v_part_interval::interval)|| ' THEN\n INSERT INTO '||v_prev_partition_name||' VALUES (NEW.*);\n ELSIF NEW.'||v_control||' >= '||quote_literal(v_next_partition_timestamp)||' AND NEW.'||v_control||' < '||\n quote_literal(v_final_partition_timestamp)|| ' THEN\n INSERT INTO '||v_next_partition_name||' VALUES (NEW.*); ';\n END LOOP;\n v_trig_func := v_trig_func ||'\n ELSE\n RETURN NEW;\n END IF;\n END IF;\n RETURN NULL;\n END $t$;';\n\n EXECUTE v_trig_func;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Added function for current time interval: '||\n v_current_partition_timestamp||' to '||(v_final_partition_timestamp-'1sec'::interval));\n END IF;\n\nELSIF v_type = 'time-dynamic' THEN\n\n v_trig_func := 'CREATE OR REPLACE FUNCTION '||p_parent_table||'_part_trig_func() RETURNS trigger LANGUAGE plpgsql AS $t$\n DECLARE\n v_count int;\n v_partition_name text;\n v_partition_timestamp timestamptz;\n v_schemaname text;\n v_tablename text;\n BEGIN\n IF TG_OP = ''INSERT'' THEN\n ';\n CASE\n WHEN v_part_interval = '15 mins' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''hour'', NEW.'||v_control||') +\n ''15min''::interval * floor(date_part(''minute'', NEW.'||v_control||') / 15.0);';\n WHEN v_part_interval = '30 mins' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''hour'', NEW.'||v_control||') +\n ''30min''::interval * floor(date_part(''minute'', NEW.'||v_control||') / 30.0);';\n WHEN v_part_interval = '1 hour' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''hour'', NEW.'||v_control||');';\n WHEN v_part_interval = '1 day' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''day'', NEW.'||v_control||');';\n WHEN v_part_interval = '1 week' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''week'', NEW.'||v_control||');';\n WHEN v_part_interval = '1 month' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''month'', NEW.'||v_control||');';\n WHEN v_part_interval = '3 months' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''quarter'', NEW.'||v_control||');';\n WHEN v_part_interval = '1 year' THEN\n v_trig_func := v_trig_func||'v_partition_timestamp := date_trunc(''year'', NEW.'||v_control||');';\n END CASE;\n\n v_trig_func := v_trig_func||'\n v_partition_name := '''||p_parent_table||'_p''|| to_char(v_partition_timestamp, '||quote_literal(v_datetime_string)||');\n v_schemaname := split_part(v_partition_name, ''.'', 1);\n v_tablename := split_part(v_partition_name, ''.'', 2);\n SELECT count(*) INTO v_count FROM pg_tables WHERE schemaname = v_schemaname AND tablename = v_tablename;\n IF v_count > 0 THEN\n EXECUTE ''INSERT INTO ''||v_partition_name||'' VALUES($1.*)'' USING NEW;\n ELSE\n RETURN NEW;\n END IF;\n END IF;\n\n RETURN NULL;\n END $t$;';\n\n EXECUTE v_trig_func;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Added function for dynamic time table: '||p_parent_table);\n END IF;\n\nELSE\n RAISE EXCEPTION 'ERROR: Invalid time partitioning type given: %', v_type;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE 'SELECT set_config(''search_path'','''||v_old_search_path||''',''false'')';\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n IF v_jobmon_schema IS NOT NULL THEN\n EXECUTE 'SELECT set_config(''search_path'',''@extschema@,'||v_jobmon_schema||''',''false'')';\n IF v_job_id IS NULL THEN\n v_job_id := add_job('PARTMAN CREATE FUNCTION: '||p_parent_table);\n v_step_id := add_step(v_job_id, 'Partition function maintenance for table '||p_parent_table||' failed');\n ELSIF v_step_id IS NULL THEN\n v_step_id := add_step(v_job_id, 'EXCEPTION before first step logged');\n END IF;\n PERFORM update_step(v_step_id, 'CRITICAL', 'ERROR: '||coalesce(SQLERRM,'unknown'));\n PERFORM fail_job(v_job_id);\n EXECUTE 'SELECT set_config(''search_path'','''||v_old_search_path||''',''false'')';\n END IF;\n RAISE EXCEPTION '%', SQLERRM;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 9356, "num_statements": 111} {"question": "What is the percentage of legal aid clinics that are located in urban areas?", "schema": "CREATE TABLE legal_aid_clinics (clinic_id INT, area_type VARCHAR(10));", "sql": "SELECT 100.0 * COUNT(*) / (SELECT COUNT(*) FROM legal_aid_clinics) AS percentage FROM legal_aid_clinics WHERE area_type = 'urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the total number of cybersecurity incidents detected in the Middle East and North Africa (MENA) region in 2021?", "schema": "CREATE TABLE security_incidents (id INT, region VARCHAR(50), incident_date DATE, incident_number INT); INSERT INTO security_incidents (id, region, incident_date, incident_number) VALUES (1, 'MENA', '2021-02-03', 100), (2, 'MENA', '2021-12-20', 200);", "sql": "SELECT SUM(incident_number) FROM security_incidents WHERE region = 'MENA' AND incident_date BETWEEN '2021-01-01' AND '2021-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "PostgreSQL regression test 'encoding': Write the SELECT query (example 9).", "schema": null, "sql": "SELECT good, truncated, with_nul FROM regress_encoding;", "explanation": "Regression test for Encoding in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT good, truncated, with_nul FROM regress_encoding) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What season has 2 as a rank?", "schema": "CREATE TABLE table_name_18 (season VARCHAR, rank VARCHAR)", "sql": "SELECT season FROM table_name_18 WHERE rank = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Margin, when Date is 19 July 2008 (Round 19)?", "schema": "CREATE TABLE table_name_27 (margin VARCHAR, date VARCHAR)", "sql": "SELECT margin FROM table_name_27 WHERE date = '19 july 2008 (round 19)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Year for Jazz club of Houston?", "schema": "CREATE TABLE table_name_54 (years_for_jazz VARCHAR, school_club_team VARCHAR)", "sql": "SELECT years_for_jazz FROM table_name_54 WHERE school_club_team = 'houston';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the TO par for the player who scored 68-69=137?", "schema": "CREATE TABLE table_name_82 (to_par VARCHAR, score VARCHAR)", "sql": "SELECT to_par FROM table_name_82 WHERE score = 68 - 69 = 137;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what city is the toamasina airport in?", "schema": "CREATE TABLE table_name_42 (city VARCHAR, airport VARCHAR)", "sql": "SELECT city FROM table_name_42 WHERE airport = 'toamasina airport';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team 1 played against team 2 of Tours FC (D2)?", "schema": "CREATE TABLE table_name_21 (team_1 VARCHAR, team_2 VARCHAR)", "sql": "SELECT team_1 FROM table_name_21 WHERE team_2 = 'tours fc (d2)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 3).", "schema": null, "sql": "CREATE FUNCTION pgstattuple(IN reloid regclass,\n OUT table_len BIGINT,\t\t-- physical table length in bytes\n OUT tuple_count BIGINT,\t\t-- number of live tuples\n OUT tuple_len BIGINT,\t\t-- total tuples length in bytes\n OUT tuple_percent FLOAT8,\t\t-- live tuples in %\n OUT dead_tuple_count BIGINT,\t-- number of dead tuples\n OUT dead_tuple_len BIGINT,\t\t-- total dead tuples length in bytes\n OUT dead_tuple_percent FLOAT8,\t-- dead tuples in %\n OUT free_space BIGINT,\t\t-- free space in bytes\n OUT free_percent FLOAT8)\t\t-- free space in %\nAS 'MODULE_PATHNAME', 'pgstattuplebyid'\nLANGUAGE C STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 613, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who had the high assists on November 17?", "schema": "CREATE TABLE table_27744976_6 (high_assists VARCHAR, date VARCHAR)", "sql": "SELECT high_assists FROM table_27744976_6 WHERE date = 'November 17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List the facilities located in California and their corresponding environmental impact scores from the 'facilities' and 'scores' tables.", "schema": "CREATE TABLE facilities(facility_id INT, facility_name TEXT, state TEXT); CREATE TABLE scores(facility_id INT, environmental_score INT);", "sql": "SELECT facilities.facility_name, scores.environmental_score FROM facilities INNER JOIN scores ON facilities.facility_id = scores.facility_id WHERE facilities.state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "Update the names of all authors from the 'Freelance' category to 'Independent Contributor'.", "schema": "CREATE TABLE authors (id INT, name TEXT, category TEXT); INSERT INTO authors (id, name, category) VALUES (1, 'Jane Doe', 'Freelance');", "sql": "UPDATE authors SET category = 'Independent Contributor' WHERE category = 'Freelance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Which country has the highest total production of rare earth elements, Australia or Brazil?", "schema": "CREATE TABLE rare_earth_production (country VARCHAR(20), quantity INT); INSERT INTO rare_earth_production (country, quantity) VALUES ('Australia', 20000), ('Brazil', 15000);", "sql": "SELECT country, MAX(quantity) FROM rare_earth_production WHERE country IN ('Australia', 'Brazil') GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the total greenhouse gas emissions reduction due to energy efficiency projects in the EU?", "schema": "CREATE TABLE greenhouse_gas_emissions (id INT PRIMARY KEY, source_type VARCHAR(50), country VARCHAR(50), year INT, amount DECIMAL(10,2));CREATE TABLE energy_efficiency_projects (id INT PRIMARY KEY, project_type VARCHAR(50), country VARCHAR(50), year INT, energy_savings DECIMAL(10,2));CREATE VIEW v_eu_energy_efficiency_projects AS SELECT eep.project_type, eep.country, SUM(eep.energy_savings) AS total_energy_savings FROM energy_efficiency_projects eep WHERE eep.country LIKE 'EU%' GROUP BY eep.project_type, eep.country;CREATE VIEW v_ghg_emissions_reductions AS SELECT ghe.source_type, ghe.country, SUM(ghe.amount) * -1 AS total_reduction FROM greenhouse_gas_emissions ghe JOIN v_eu_energy_efficiency_projects eep ON ghe.country = eep.country WHERE ghe.source_type = 'Energy' GROUP BY ghe.source_type, ghe.country;", "sql": "SELECT total_reduction FROM v_ghg_emissions_reductions WHERE source_type = 'Energy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the airdate of the episode with 106000 dave ja vu viewers?", "schema": "CREATE TABLE table_25721_3 (airdate VARCHAR, dave_ja_vu_viewers VARCHAR)", "sql": "SELECT airdate FROM table_25721_3 WHERE dave_ja_vu_viewers = 106000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Update the financialWellbeing table to reflect increased financial wellbeing for user 'Ahmed'.", "schema": "CREATE TABLE FinancialWellbeing (userID VARCHAR(20), wellbeingScore INT); INSERT INTO FinancialWellbeing (userID, wellbeingScore) VALUES ('Ahmed', 6), ('Sara', 8);", "sql": "UPDATE FinancialWellbeing SET wellbeingScore = 8 WHERE userID = 'Ahmed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Update the mental health parity data to reflect the correct number of mental health visits for patient 5.", "schema": "CREATE TABLE MentalHealthParity (PatientID int, MentalHealthVisits int); INSERT INTO MentalHealthParity (PatientID, MentalHealthVisits) VALUES (1, 5), (2, 3), (3, 6), (4, 4), (5, 8), (6, 7);", "sql": "UPDATE MentalHealthParity SET MentalHealthVisits = 7 WHERE PatientID = 5;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of Frequency MHz in woodstock, georgia?", "schema": "CREATE TABLE table_name_91 (frequency_mhz INTEGER, city_of_license VARCHAR)", "sql": "SELECT SUM(frequency_mhz) FROM table_name_91 WHERE city_of_license = 'woodstock, georgia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Score of the Burnley Home game?", "schema": "CREATE TABLE table_name_57 (score VARCHAR, home_team VARCHAR)", "sql": "SELECT score FROM table_name_57 WHERE home_team = 'burnley';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which lap number had a grid number bigger than 9 and where the driver was Mark Webber?", "schema": "CREATE TABLE table_name_77 (laps VARCHAR, grid VARCHAR, driver VARCHAR)", "sql": "SELECT laps FROM table_name_77 WHERE grid > 9 AND driver = 'mark webber';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Update the population of the Arctic Fox by 150.", "schema": "CREATE TABLE species (id INT PRIMARY KEY, name VARCHAR(50), population INT);", "sql": "WITH updated_population AS (UPDATE species SET population = population + 150 WHERE name = 'Arctic Fox') SELECT * FROM species WHERE name = 'Arctic Fox';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "What is the maximum support duration for each location?", "schema": "CREATE TABLE If Not Exists refugee_support (supporter_id INT, supporter_name TEXT, location TEXT, support_duration INT); INSERT INTO refugee_support (supporter_id, supporter_name, location, support_duration) VALUES (4, 'Alex Johnson', 'Afghanistan', 75), (5, 'Sophia Lee', 'Pakistan', 50);", "sql": "SELECT location, MAX(support_duration) as max_support_duration FROM refugee_support GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the sum of all ticket sales in Asia?", "schema": "CREATE TABLE asia_concerts (concert_id INT, sales DECIMAL(10, 2)); INSERT INTO asia_concerts (concert_id, sales) VALUES (1, 5000), (2, 6000), (3, 7000), (4, 8000);", "sql": "SELECT SUM(sales) AS total_sales FROM asia_concerts WHERE country = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Find the number of male and female students in the 'Student' table", "schema": "CREATE TABLE Student (StudentID INT, Gender VARCHAR(10)); INSERT INTO Student (StudentID, Gender) VALUES (1, 'Male'), (2, 'Female'), (3, 'Male');", "sql": "SELECT Gender, COUNT(*) FROM Student GROUP BY Gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Trigger (example 10).", "schema": null, "sql": "DROP TRIGGER show_trigger_data_trig on trigger_test;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the game(s) against Iowa take place?", "schema": "CREATE TABLE table_name_94 (site VARCHAR, opponent VARCHAR)", "sql": "SELECT site FROM table_name_94 WHERE opponent = 'iowa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Date of the Rodez, France Tournament?", "schema": "CREATE TABLE table_name_63 (date VARCHAR, tournament VARCHAR)", "sql": "SELECT date FROM table_name_63 WHERE tournament = 'rodez, france';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the count of packages in 'received' status at each warehouse in 'Asia'?", "schema": "CREATE TABLE Warehouse (id INT, name VARCHAR(20), city VARCHAR(20), country VARCHAR(20)); INSERT INTO Warehouse (id, name, city, country) VALUES (1, 'Seoul Warehouse', 'Seoul', 'South Korea'), (2, 'Mumbai Warehouse', 'Mumbai', 'India'), (3, 'Tokyo Warehouse', 'Tokyo', 'Japan'); CREATE TABLE Packages (id INT, warehouse_id INT, status VARCHAR(20)); INSERT INTO Packages (id, warehouse_id, status) VALUES (1, 1, 'received'), (2, 1, 'processing'), (3, 2, 'received'), (4, 2, 'received'), (5, 3, 'processing');", "sql": "SELECT warehouse_id, COUNT(*) FROM Packages WHERE status = 'received' GROUP BY warehouse_id HAVING country = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Transaction (example 27).", "schema": null, "sql": "$$;\n\nCALL transaction_test8();\n\n\n-- commit inside cursor loop\nCREATE TABLE test2 (x int);\nINSERT INTO test2 VALUES (0), (1), (2), (3), (4);\n\nTRUNCATE test1;\n\nDO LANGUAGE plpgsql $$\nDECLARE\n r RECORD;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Transaction.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 202, "num_statements": 6} {"question": "Generate PostgreSQL SQL for: Which Percentage has a Draw of 6?", "schema": "CREATE TABLE table_name_9 (percentage VARCHAR, draw VARCHAR)", "sql": "SELECT percentage FROM table_name_9 WHERE draw = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the players that have attended Stanford?", "schema": "CREATE TABLE table_16494599_10 (player VARCHAR, school_club_team VARCHAR)", "sql": "SELECT player FROM table_16494599_10 WHERE school_club_team = 'Stanford';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which LEMA/SUBLEMA has a Ch of Senators of 2?", "schema": "CREATE TABLE table_name_4 (lema_sublema VARCHAR, ch_of_senators VARCHAR)", "sql": "SELECT lema_sublema FROM table_name_4 WHERE ch_of_senators = '2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the number of series for 19", "schema": "CREATE TABLE table_23958944_6 (no_by_series VARCHAR, no_by_season VARCHAR)", "sql": "SELECT no_by_series FROM table_23958944_6 WHERE no_by_season = 19;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Who are the founders of the startups that have received funding of over 2000000?", "schema": "CREATE TABLE company (name VARCHAR(255), founder_name VARCHAR(100)); INSERT INTO company (name, founder_name) VALUES ('CompanyA', 'John Smith'), ('CompanyB', 'Jane Doe'), ('CompanyC', 'Michael Brown'), ('CompanyD', 'Sarah Johnson'); CREATE TABLE funding (company_name VARCHAR(255), amount INT); INSERT INTO funding (company_name, amount) VALUES ('CompanyA', 1000000), ('CompanyB', 2500000), ('CompanyC', 1500000), ('CompanyD', 3000000);", "sql": "SELECT company.founder_name FROM company INNER JOIN funding ON company.name = funding.company_name WHERE funding.amount > 2000000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What is the average rating of cultural heritage sites in Madrid?", "schema": "CREATE TABLE heritage_sites (site_id INT, site_name TEXT, city TEXT, rating INT); INSERT INTO heritage_sites (site_id, site_name, city, rating) VALUES (1, 'Prado Museum', 'Madrid', 5), (2, 'Royal Palace', 'Madrid', 4), (3, 'Retiro Park', 'Madrid', 4);", "sql": "SELECT AVG(rating) FROM heritage_sites WHERE city = 'Madrid';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many picks for round 12?", "schema": "CREATE TABLE table_name_5 (pick__number VARCHAR, round VARCHAR)", "sql": "SELECT COUNT(pick__number) FROM table_name_5 WHERE round = 12;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the opponent in a week less than 8 on October 9, 1960?", "schema": "CREATE TABLE table_name_24 (opponent VARCHAR, week VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_24 WHERE week < 8 AND date = 'october 9, 1960';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total billing amount by attorney in the Southwest region?", "schema": "CREATE TABLE Attorneys (AttorneyID int, Name varchar(50), Region varchar(10)); INSERT INTO Attorneys VALUES (1, 'Alex Garcia', 'Southwest'), (2, 'Hee Jun Lee', 'Northeast'); CREATE TABLE Billing (BillingID int, AttorneyID int, Amount decimal(10,2)); INSERT INTO Billing VALUES (1, 1, 800.00), (2, 1, 1200.00), (3, 2, 500.00), (4, 2, 700.00);", "sql": "SELECT A.Name, SUM(B.Amount) as TotalBilling FROM Attorneys A JOIN Billing B ON A.AttorneyID = B.AttorneyID WHERE A.Region = 'Southwest' GROUP BY A.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Update the 'space_exploration' table to mark the 'Apollo 11' mission as 'successful'", "schema": "CREATE TABLE space_exploration (id INT PRIMARY KEY, mission_name VARCHAR(50), mission_status VARCHAR(20));", "sql": "UPDATE space_exploration SET mission_status = 'successful' WHERE mission_name = 'Apollo 11';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who got the loss on the game that ended in a 52-36 record?", "schema": "CREATE TABLE table_name_22 (loss VARCHAR, record VARCHAR)", "sql": "SELECT loss FROM table_name_22 WHERE record = '52-36';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2000 value if the 1998 value is 1.5?", "schema": "CREATE TABLE table_name_67 (Id VARCHAR)", "sql": "SELECT 2000 FROM table_name_67 WHERE 1998 = '1.5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the maximum funding received by startups in the clean energy sector that were founded after 2010?", "schema": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founding_year INT); INSERT INTO company (id, name, industry, founding_year) VALUES (1, 'CleanTechInnovations', 'Clean Energy', 2011), (2, 'PowerEasy', 'Clean Energy', 2016); CREATE TABLE funding (id INT, company_id INT, amount INT); INSERT INTO funding (id, company_id, amount) VALUES (1, 1, 3000000), (2, 2, 1000000);", "sql": "SELECT MAX(funding.amount) FROM funding INNER JOIN company ON funding.company_id = company.id WHERE company.industry = 'Clean Energy' AND company.founding_year > 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "How many events are in the 'events' table for each type?", "schema": "CREATE TABLE events (event_id INT, name VARCHAR(50), type VARCHAR(50), attendance INT); INSERT INTO events (event_id, name, type, attendance) VALUES (1, 'Art Exhibit', 'Painting', 1500); INSERT INTO events (event_id, name, type, attendance) VALUES (2, 'Theater Performance', 'Play', 850); INSERT INTO events (event_id, name, type, attendance) VALUES (3, 'Art Exhibit', 'Sculpture', 1200);", "sql": "SELECT type, COUNT(*) as event_count FROM events GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the maximum energy storage capacity (in MWh) in South Korea?", "schema": "CREATE TABLE energy_storage (country VARCHAR(20), capacity FLOAT); INSERT INTO energy_storage (country, capacity) VALUES ('South Korea', 500.0), ('South Korea', 600.0), ('South Korea', 700.0);", "sql": "SELECT MAX(capacity) FROM energy_storage WHERE country = 'South Korea';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player on the Bulls has a 6 May 1978 birthday?", "schema": "CREATE TABLE table_name_58 (player VARCHAR, club_province VARCHAR, date_of_birth__age_ VARCHAR)", "sql": "SELECT player FROM table_name_58 WHERE club_province = 'bulls' AND date_of_birth__age_ = '6 may 1978';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Update the record for provider with ID 12345 to reflect their new title, 'Senior Community Health Worker'", "schema": "CREATE TABLE providers (id INT PRIMARY KEY, name VARCHAR(100), city VARCHAR(50), specialty VARCHAR(50));", "sql": "UPDATE providers SET specialty = 'Senior Community Health Worker' WHERE id = 12345;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Select the details of the fields that have a size greater than 5000 square meters", "schema": "CREATE TABLE PrecisionAgriculture.FieldDetails (FieldID INT, FieldSize FLOAT, Location VARCHAR(255));", "sql": "SELECT FieldID, FieldSize, Location FROM PrecisionAgriculture.FieldDetails WHERE FieldSize > 5000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the Home Stadium in which the Division is in the south, and Conference is national, as well as being in the city named charlotte, North Carolina?", "schema": "CREATE TABLE table_name_64 (home_stadium VARCHAR, city VARCHAR, division VARCHAR, conference VARCHAR)", "sql": "SELECT home_stadium FROM table_name_64 WHERE division = 'south' AND conference = 'national' AND city = 'charlotte, north carolina';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result of the election in the Texas 3 district?", "schema": "CREATE TABLE table_1342149_43 (result VARCHAR, district VARCHAR)", "sql": "SELECT result FROM table_1342149_43 WHERE district = 'Texas 3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Change the court id from 3 to 4 for judge with id 4", "schema": "CREATE TABLE judges (id INT, first_name VARCHAR(20), last_name VARCHAR(20), court_id INT); INSERT INTO judges (id, first_name, last_name, court_id) VALUES (4, 'Fatima', 'Adebayo', 3); CREATE TABLE courts (id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO courts (id, name, location) VALUES (3, 'High Court of New Zealand', 'Wellington'); INSERT INTO courts (id, name, location) VALUES (4, 'Federal Court of Nigeria', 'Abuja');", "sql": "UPDATE judges SET court_id = 4 WHERE id = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the album Closer: The Best of Sarah Mclachlan certified as?", "schema": "CREATE TABLE table_name_88 (certification VARCHAR, album VARCHAR)", "sql": "SELECT certification FROM table_name_88 WHERE album = 'closer: the best of sarah mclachlan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date amer sulaiman won?", "schema": "CREATE TABLE table_name_50 (date VARCHAR, winner VARCHAR)", "sql": "SELECT date FROM table_name_50 WHERE winner = 'amer sulaiman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How big was the largest crowd recorded at the Arden Street Oval venue?", "schema": "CREATE TABLE table_name_73 (crowd INTEGER, venue VARCHAR)", "sql": "SELECT MAX(crowd) FROM table_name_73 WHERE venue = 'arden street oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION g_intbig_options(internal)\nRETURNS void\nAS 'MODULE_PATHNAME', 'g_intbig_options'\nLANGUAGE C IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 302).", "schema": null, "sql": "update pktable set base1=base1*4 where base1<3;", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "How many students are enrolled in the 'AssistiveTechnology' and 'SignLanguageInterpreter' programs in the 'StudentPrograms' table?", "schema": "CREATE TABLE StudentPrograms (student_id INT, program_name VARCHAR(255)); INSERT INTO StudentPrograms (student_id, program_name) VALUES (1, 'AssistiveTechnology'), (2, 'SignLanguageInterpreter'), (3, 'ExtendedTestingTime'), (4, 'AssistiveTechnology'), (5, 'ExtendedTestingTime');", "sql": "SELECT COUNT(*) FROM StudentPrograms WHERE program_name IN ('AssistiveTechnology', 'SignLanguageInterpreter');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the average financial capability score for men in South Africa and Brazil?", "schema": "CREATE TABLE afc_scores (name TEXT, gender TEXT, country TEXT, score NUMERIC); INSERT INTO afc_scores (name, gender, country, score) VALUES ('John Doe', 'Male', 'South Africa', 70), ('John Smith', 'Male', 'Brazil', 75), ('Jane Doe', 'Female', 'South Africa', 75);", "sql": "SELECT AVG(score) FROM afc_scores WHERE gender = 'Male' AND country IN ('South Africa', 'Brazil');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "List all wildlife species and their respective habitats.", "schema": "CREATE TABLE wildlife_species (id INT, species VARCHAR(255)); CREATE TABLE wildlife_habitat (id INT, habitat VARCHAR(255)); INSERT INTO wildlife_species (id, species) VALUES (1, 'Tiger'), (2, 'Elephant'), (3, 'Monkey'); INSERT INTO wildlife_habitat (id, habitat) VALUES (1, 'Jungle'), (2, 'Savannah'), (3, 'Forest');", "sql": "SELECT wildlife_species.species, wildlife_habitat.habitat FROM wildlife_species INNER JOIN wildlife_habitat ON wildlife_species.id = wildlife_habitat.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "What is the total revenue from Electronic dance music concerts in May?", "schema": "CREATE TABLE Concerts (id INT, genre VARCHAR(20), price DECIMAL(5,2)); INSERT INTO Concerts (id, genre, price) VALUES (1, 'Electronic dance music', 100.00), (2, 'Rock', 75.00), (3, 'Electronic dance music', 120.00); CREATE TABLE Dates (id INT, concert_id INT, date DATE); INSERT INTO Dates (id, concert_id) VALUES (1, 1), (2, 2), (3, 3);", "sql": "SELECT SUM(price) FROM Concerts JOIN Dates ON Concerts.id = Dates.concert_id WHERE Concerts.genre = 'Electronic dance music' AND Dates.date BETWEEN '2022-05-01' AND '2022-05-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 178, "num_statements": 1} {"question": "List all routes with fare collections in July.", "schema": "CREATE TABLE route (route_id INT, route_name VARCHAR(50)); INSERT INTO route (route_id, route_name) VALUES (1, 'Red Line'), (2, 'Green Line'), (3, 'Blue Line'), (4, 'Yellow Line'); CREATE TABLE fare (fare_id INT, route_id INT, fare_amount DECIMAL(5,2), collection_date DATE); INSERT INTO fare (fare_id, route_id, fare_amount, collection_date) VALUES (1, 1, 3.50, '2022-06-01'), (2, 1, 3.25, '2022-06-03'), (3, 2, 3.50, '2022-06-05'), (4, 2, 3.25, '2022-06-07'), (5, 3, 3.50, '2022-06-09'), (6, 3, 3.25, '2022-06-11'), (7, 4, 4.00, '2022-07-01'), (8, 4, 4.25, '2022-07-02'), (9, 4, 4.50, '2022-07-03'), (10, 4, 4.25, '2022-07-04');", "sql": "SELECT route_name FROM route JOIN fare ON route.route_id = fare.route_id WHERE MONTH(collection_date) = 7;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "List all artworks and their respective curators.", "schema": "CREATE TABLE Artworks (artwork_name TEXT, curator TEXT);", "sql": "SELECT artwork_name, curator FROM Artworks;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the location for the date (to) 8 october 1922?", "schema": "CREATE TABLE table_12562214_1 (location VARCHAR, date__to_ VARCHAR)", "sql": "SELECT location FROM table_12562214_1 WHERE date__to_ = '8 October 1922';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "How many IoT sensors were installed in rural and urban areas in each country in the past quarter?", "schema": "CREATE TABLE country (id INTEGER, name TEXT);CREATE TABLE region (id INTEGER, country_id INTEGER, name TEXT, type TEXT);CREATE TABLE iot_sensor (id INTEGER, region_id INTEGER, installed_date DATE);", "sql": "SELECT co.name as country, r.type as area_type, COUNT(s.id) as num_sensors FROM country co INNER JOIN region r ON co.id = r.country_id INNER JOIN iot_sensor s ON r.id = s.region_id WHERE s.installed_date >= DATEADD(quarter, -1, CURRENT_DATE) GROUP BY co.name, r.type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1} {"question": "Number of underwater volcanoes near the Philippines", "schema": "CREATE TABLE Underwater_Volcanoes (id INT, volcano_name VARCHAR(50), location VARCHAR(50), depth FLOAT); INSERT INTO Underwater_Volcanoes (id, volcano_name, location, depth) VALUES (1, 'Apo', 'Philippines', -2200);", "sql": "SELECT location, COUNT(*) FROM Underwater_Volcanoes GROUP BY location HAVING location = 'Philippines';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the total cost of accommodations for students with visual impairments in H1 2021?", "schema": "CREATE TABLE accommodations (id INT, student_id INT, accommodation_type VARCHAR(50), cost FLOAT, accommodation_date DATE); INSERT INTO accommodations (id, student_id, accommodation_type, cost, accommodation_date) VALUES (1, 2, 'Sign Language Interpreter', 50.00, '2021-01-01'), (2, 3, 'Assistive Listening Devices', 300.00, '2021-04-01');", "sql": "SELECT SUM(cost) FROM accommodations WHERE accommodation_date BETWEEN '2021-01-01' AND '2021-06-30' AND EXISTS (SELECT * FROM students WHERE students.id = accommodations.student_id AND students.disability_type = 'Visual Impairment');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the Tag Team with a Time of 03:34?", "schema": "CREATE TABLE table_name_10 (tag_team VARCHAR, time VARCHAR)", "sql": "SELECT tag_team FROM table_name_10 WHERE time = '03:34';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Insert a new record into the \"manufacturing_data\" table", "schema": "CREATE TABLE manufacturing_data (id INT PRIMARY KEY, chemical_name VARCHAR(255), quantity_produced INT, date_manufactured DATE);", "sql": "INSERT INTO manufacturing_data (id, chemical_name, quantity_produced, date_manufactured) VALUES (1, 'Ammonia', 100, '2022-01-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was Keiichi Ubukata the mediator and Mitsuko Mori the red team host?", "schema": "CREATE TABLE table_1315616_1 (date VARCHAR, mediator VARCHAR, red_team_host VARCHAR)", "sql": "SELECT date FROM table_1315616_1 WHERE mediator = 'Keiichi Ubukata' AND red_team_host = 'Mitsuko Mori';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 35).", "schema": null, "sql": "CREATE OPERATOR <-> (\n\tLEFTARG = interval,\n\tRIGHTARG = interval,\n\tPROCEDURE = interval_dist,\n\tCOMMUTATOR = '<->'\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 115, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result in 2013 in the tournament whose 2010 result was q2?", "schema": "CREATE TABLE table_name_8 (Id VARCHAR)", "sql": "SELECT 2013 FROM table_name_8 WHERE 2010 = 'q2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Which artists have released the most songs in the Jazz genre on Apple Music?", "schema": "CREATE TABLE AppleMusicSongs (ArtistID INT, ArtistName VARCHAR(100), Genre VARCHAR(50), SongID INT); INSERT INTO AppleMusicSongs (ArtistID, ArtistName, Genre, SongID) VALUES (1, 'Miles Davis', 'Jazz', 1), (2, 'John Coltrane', 'Jazz', 2), (3, 'Miles Davis', 'Jazz', 3);", "sql": "SELECT ArtistName, COUNT(*) as SongCount FROM AppleMusicSongs WHERE Genre = 'Jazz' GROUP BY ArtistName ORDER BY SongCount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 12).", "schema": null, "sql": "INSERT INTO b_star (class, b) VALUES ('b', 'bumble'::text);", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the average amount of research funding received by each department in the past two years?", "schema": "CREATE TABLE Departments (DepartmentID INT, Name VARCHAR(50)); INSERT INTO Departments VALUES (1, 'Computer Science'); CREATE TABLE ResearchGrants (GrantID INT, DepartmentID INT, Amount DECIMAL(10,2)); INSERT INTO ResearchGrants VALUES (1, 1, 5000); INSERT INTO ResearchGrants VALUES (2, 1, 7000);", "sql": "SELECT Departments.Name, AVG(ResearchGrants.Amount) FROM Departments INNER JOIN ResearchGrants ON Departments.DepartmentID = ResearchGrants.DepartmentID WHERE ResearchGrants.GrantID >= DATEADD(year, -2, GETDATE()) GROUP BY Departments.Name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 240, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the most legs for steinlager 2", "schema": "CREATE TABLE table_256862_1 (legs INTEGER, winning_yacht VARCHAR)", "sql": "SELECT MAX(legs) FROM table_256862_1 WHERE winning_yacht = 'Steinlager 2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the maximum water consumption per day in Egypt?", "schema": "CREATE TABLE daily_water_consumption (country VARCHAR(20), max_consumption FLOAT); INSERT INTO daily_water_consumption (country, max_consumption) VALUES ('Egypt', 1200000);", "sql": "SELECT max_consumption FROM daily_water_consumption WHERE country = 'Egypt';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the total amount donated by each donor in the 'donors' table, sorted by the total donation amount in descending order?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount FLOAT); INSERT INTO donors (donor_id, donor_name, donation_amount) VALUES (1, 'John Doe', 250.00), (2, 'Jane Smith', 200.00), (3, 'Alice Johnson', 150.00);", "sql": "SELECT donor_name, SUM(donation_amount) as total_donation FROM donors GROUP BY donor_name ORDER BY total_donation DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the latest first elected?", "schema": "CREATE TABLE table_2668405_17 (first_elected INTEGER)", "sql": "SELECT MAX(first_elected) FROM table_2668405_17;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which rider has time of +19.751?", "schema": "CREATE TABLE table_name_59 (rider VARCHAR, time VARCHAR)", "sql": "SELECT rider FROM table_name_59 WHERE time = '+19.751';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the minimum number of members Europe had at the time Africa had 7375139?", "schema": "CREATE TABLE table_1914090_2 (europe INTEGER, africa VARCHAR)", "sql": "SELECT MIN(europe) FROM table_1914090_2 WHERE africa = 7375139;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "How many food safety violations were there in each restaurant in February 2022?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, location TEXT); INSERT INTO restaurants (id, name, location) VALUES (1, 'Restaurant A', 'City A'), (2, 'Restaurant B', 'City B'); CREATE TABLE inspections (restaurant_id INT, date DATE, violations INT); INSERT INTO inspections (restaurant_id, date, violations) VALUES (1, '2022-02-01', 2), (1, '2022-02-15', 1), (2, '2022-02-03', 3), (2, '2022-02-20', 0);", "sql": "SELECT r.id, SUM(i.violations) as total_violations FROM inspections i JOIN restaurants r ON i.restaurant_id = r.id WHERE i.date BETWEEN '2022-02-01' AND '2022-02-28' GROUP BY r.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What high assists have a game greater than 46?", "schema": "CREATE TABLE table_name_61 (high_assists VARCHAR, game INTEGER)", "sql": "SELECT high_assists FROM table_name_61 WHERE game > 46;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the nationality of the player picked to go to Washington Capitals?", "schema": "CREATE TABLE table_2897457_2 (nationality VARCHAR, nhl_team VARCHAR)", "sql": "SELECT nationality FROM table_2897457_2 WHERE nhl_team = 'Washington Capitals';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the average sentence length for each type of crime in Los Angeles County in 2019?", "schema": "CREATE TABLE sentences (crime_type VARCHAR(255), sentence_length INT); INSERT INTO sentences (crime_type, sentence_length) VALUES ('Murder', 25);", "sql": "SELECT crime_type, AVG(sentence_length) OVER (PARTITION BY crime_type) as avg_sentence FROM sentences WHERE YEAR(sentence_date) = 2019;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Third in the 1993-94 season where Philippe Caux was a lead?", "schema": "CREATE TABLE table_name_39 (third VARCHAR, lead VARCHAR, season VARCHAR)", "sql": "SELECT third FROM table_name_39 WHERE lead = 'philippe caux' AND season = '1993-94';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Venue has a Notes of 2:28:31?", "schema": "CREATE TABLE table_name_83 (venue VARCHAR, notes VARCHAR)", "sql": "SELECT venue FROM table_name_83 WHERE notes = '2:28:31';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Update the quantity of a sustainable material for a brand.", "schema": "CREATE TABLE brands (id INT, name VARCHAR(50)); CREATE TABLE materials_used (id INT, brand_id INT, material VARCHAR(50), quantity INT); INSERT INTO brands (id, name) VALUES (1, 'Brand A'), (2, 'Brand B'); INSERT INTO materials_used (id, brand_id, material, quantity) VALUES (1, 1, 'Organic Cotton', 100), (2, 1, 'Recycled Polyester', 150), (3, 2, 'Organic Cotton', 200);", "sql": "UPDATE materials_used SET quantity = 175 WHERE id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 55).", "schema": null, "sql": "select '{\"a\": \"c\", \"b\": null}'::jsonb ->> 'b';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{\"a\": \"c\", \"b\": null}'::jsonb ->> 'b') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Sanskrit has an English of mindfulness of breathing?", "schema": "CREATE TABLE table_name_26 (sanskrit VARCHAR, english VARCHAR)", "sql": "SELECT sanskrit FROM table_name_26 WHERE english = 'mindfulness of breathing';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team score with Away team Richmond?", "schema": "CREATE TABLE table_name_20 (home_team VARCHAR, away_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_20 WHERE away_team = 'richmond';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the maximum revenue generated by a single OTA for a hotel in Japan with a 4-star rating?", "schema": "CREATE TABLE hotels (hotel_id INT, hotel_name TEXT, country TEXT, stars INT, revenue FLOAT); INSERT INTO hotels (hotel_id, hotel_name, country, stars, revenue) VALUES (1, 'Hotel V', 'Japan', 4, 12000), (2, 'Hotel W', 'Japan', 5, 18000), (3, 'Hotel X', 'Japan', 4, 15000); CREATE TABLE otas (ota_id INT, ota_name TEXT, hotel_id INT, otas_revenue FLOAT); INSERT INTO otas (ota_id, ota_name, hotel_id, otas_revenue) VALUES (1, 'OTA1', 1, 8000), (2, 'OTA2', 2, 10000), (3, 'OTA3', 3, 13000), (4, 'OTA4', 1, 10000);", "sql": "SELECT MAX(otas_revenue) FROM otas JOIN hotels ON otas.hotel_id = hotels.hotel_id WHERE hotels.country = 'Japan' AND hotels.stars = 4;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "What is the maximum revenue for restaurants in Chicago?", "schema": "CREATE TABLE restaurant_revenue(location VARCHAR(255), revenue INT); INSERT INTO restaurant_revenue(location, revenue) VALUES ('Location1', 5000), ('Location2', 7000), ('Location3', 3000), ('Restaurant4', 6000), ('Restaurant5', 10000), ('Restaurant6', 8000);", "sql": "SELECT MAX(revenue) FROM restaurant_revenue WHERE location LIKE '%Chicago%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the maximum number of comments on posts by users from Australia, for posts containing the hashtag #food, in the last month?", "schema": "CREATE TABLE users (id INT, country VARCHAR(255)); CREATE TABLE posts (id INT, user_id INT, comments INT, hashtags TEXT, post_date DATE);", "sql": "SELECT MAX(comments) FROM posts INNER JOIN users ON posts.user_id = users.id WHERE users.country = 'Australia' AND hashtags LIKE '%#food%' AND post_date >= DATE(NOW()) - INTERVAL 1 MONTH;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "How many water treatment facilities have been updated in India in 2021?", "schema": "CREATE TABLE water_treatment_facilities (location VARCHAR(50), last_update DATE); INSERT INTO water_treatment_facilities (location, last_update) VALUES ('Mumbai', '2021-01-01'), ('Delhi', '2021-02-03'), ('Bangalore', '2021-04-05');", "sql": "SELECT COUNT(*) FROM water_treatment_facilities WHERE last_update >= '2021-01-01' AND last_update <= '2021-12-31' AND location = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "List all clients who have completed any financial program and their account balances.", "schema": "CREATE TABLE financial_programs (client_id INT, program_name VARCHAR(30), program_status VARCHAR(20)); INSERT INTO financial_programs (client_id, program_name, program_status) VALUES (301, 'Islamic Financial Capability', 'Completed'), (302, 'Financial Wellbeing', 'Enrolled'), (303, 'Islamic Financial Capability', 'Completed'), (304, 'Financial Capability', 'Dropped Out'); CREATE TABLE account_balances (client_id INT, account_balance DECIMAL(10,2)); INSERT INTO account_balances (client_id, account_balance) VALUES (301, 1000.00), (302, 2000.00), (303, 3000.00), (304, 4000.00);", "sql": "SELECT * FROM financial_programs INNER JOIN account_balances ON financial_programs.client_id = account_balances.client_id WHERE program_status = 'Completed';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who is the the incumbent with candidates being ben cravens (d) unopposed", "schema": "CREATE TABLE table_1342331_5 (incumbent VARCHAR, candidates VARCHAR)", "sql": "SELECT incumbent FROM table_1342331_5 WHERE candidates = 'Ben Cravens (D) Unopposed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total installed renewable energy capacity (in GW) for each continent in 2022?", "schema": "CREATE TABLE renewable_energy_capacity (continent VARCHAR(50), year INT, renewable_energy_capacity FLOAT); INSERT INTO renewable_energy_capacity (continent, year, renewable_energy_capacity) VALUES ('Africa', 2022, 200.5), ('Asia', 2022, 800.7), ('Europe', 2022, 600.3), ('North America', 2022, 500.2), ('South America', 2022, 300.1), ('Australia', 2022, 100.9);", "sql": "SELECT r.continent, SUM(r.renewable_energy_capacity) FROM renewable_energy_capacity r WHERE r.year = 2022 GROUP BY r.continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many teams were at 10 points?", "schema": "CREATE TABLE table_26473176_1 (team VARCHAR, points VARCHAR)", "sql": "SELECT COUNT(team) FROM table_26473176_1 WHERE points = 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 5).", "schema": null, "sql": "SELECT count(*) FROM datetmp WHERE a = '2001-02-13';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: tell how many wins there was when the score was 490", "schema": "CREATE TABLE table_14070062_4 (won VARCHAR, points_for VARCHAR)", "sql": "SELECT COUNT(won) FROM table_14070062_4 WHERE points_for = '490';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the rider with a grid of 36?", "schema": "CREATE TABLE table_name_39 (rider VARCHAR, grid VARCHAR)", "sql": "SELECT rider FROM table_name_39 WHERE grid = 36;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of step 6 for 11 gs grade", "schema": "CREATE TABLE table_2319437_1 (step_6 VARCHAR, gs_grade VARCHAR)", "sql": "SELECT COUNT(step_6) FROM table_2319437_1 WHERE gs_grade = 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "What is the maximum number of goals scored by a player in a single game?", "schema": "CREATE TABLE game_stats (id INT, player TEXT, goals INT); INSERT INTO game_stats (id, player, goals) VALUES (1, 'Pele', 7), (2, 'Diego', 6), (3, 'Ronaldo', 5);", "sql": "SELECT MAX(goals) FROM game_stats;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Drop the table for cultural competency data", "schema": "CREATE TABLE cultural_competency (id INT PRIMARY KEY, state VARCHAR(2), year INT, training_hours FLOAT);", "sql": "DROP TABLE IF EXISTS cultural_competency;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Update the record of the offender with ID 3 in the restorative_justice_programs table, changing the state to 'Oregon'.", "schema": "CREATE TABLE restorative_justice_programs (id INT, offender_name TEXT, age INT, state TEXT); INSERT INTO restorative_justice_programs (id, offender_name, age, state) VALUES (1, 'John Doe', 34, 'California'); INSERT INTO restorative_justice_programs (id, offender_name, age, state) VALUES (2, 'Jane Smith', 27, 'California'); INSERT INTO restorative_justice_programs (id, offender_name, age, state) VALUES (3, 'Mike Brown', 30, 'California');", "sql": "UPDATE restorative_justice_programs SET state = 'Oregon' WHERE id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the total number of agricultural innovation metrics in each country, sorted by metric count in descending order?", "schema": "CREATE TABLE countries (country_id INT, country_name VARCHAR(255)); CREATE TABLE metrics (metric_id INT, metric_name VARCHAR(255), country_id INT);", "sql": "SELECT c.country_name, COUNT(m.metric_id) as metric_count FROM countries c JOIN metrics m ON c.country_id = m.country_id GROUP BY c.country_name ORDER BY metric_count DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "What is the average timeline for completing commercial construction projects in Chicago, categorized by project size?", "schema": "CREATE TABLE Commercial_Projects (ProjectID INT, City VARCHAR(50), Size VARCHAR(50), Timeline INT);", "sql": "SELECT Size, AVG(Timeline) FROM Commercial_Projects WHERE City = 'Chicago' AND ProjectType = 'Commercial' GROUP BY Size;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What was the total waste produced by the 'West Coast' plant in 2020?", "schema": "CREATE TABLE waste (plant varchar(10), year int, waste_amount int); INSERT INTO waste (plant, year, waste_amount) VALUES ('North Plant', 2020, 150), ('North Plant', 2019, 140), ('West Plant', 2020, 200), ('West Plant', 2019, 180);", "sql": "SELECT SUM(waste_amount) FROM waste WHERE plant = 'West Plant' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Insert a new record into the MarineLife table for a species named 'Blue Whale' with an id of 1.", "schema": "CREATE TABLE marine_life (id INT, species_name VARCHAR(255)); INSERT INTO marine_life (id, species_name) VALUES (1, 'Dolphin'), (2, 'Shark'), (3, 'Tuna');", "sql": "INSERT INTO marine_life (id, species_name) VALUES (1, 'Blue Whale');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Identify the number of public transit users in New York City by subway line.", "schema": "CREATE TABLE subway_ridership (user_id INT, trip_date DATE, trip_subway_line VARCHAR(20)); INSERT INTO subway_ridership (user_id, trip_date, trip_subway_line) VALUES (1, '2022-01-01', '4'), (2, '2022-01-01', '6');", "sql": "SELECT trip_subway_line, COUNT(DISTINCT user_id) AS unique_users FROM subway_ridership GROUP BY trip_subway_line;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "How many vessels visited each port in the US in the last 3 months?", "schema": "CREATE TABLE ports (port_id INT, port_name TEXT, country TEXT);CREATE TABLE visits (visit_id INT, vessel_id INT, port_id INT, visit_date DATE);", "sql": "SELECT ports.port_name, COUNT(visits.vessel_id) as num_visits FROM ports JOIN visits ON ports.port_id = visits.port_id WHERE ports.country = 'US' AND visits.visit_date BETWEEN DATEADD(month, -3, CURRENT_DATE) AND CURRENT_DATE GROUP BY ports.port_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 251, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which opponents are on the date September 5?", "schema": "CREATE TABLE table_23612439_2 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_23612439_2 WHERE date = 'September 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the minimum salary for employees who have completed training programs in the Finance department?", "schema": "CREATE TABLE employees (employee_id INT, department VARCHAR(255), salary INT); INSERT INTO employees (employee_id, department, salary) VALUES (1, 'Finance', 50000), (2, 'HR', 60000), (3, 'Finance', 55000), (4, 'Finance', 70000); CREATE TABLE training_programs (program_id INT, department VARCHAR(255)); INSERT INTO training_programs (program_id, department) VALUES (1, 'IT'), (2, 'HR'), (3, 'Finance'); CREATE TABLE completed_training (employee_id INT, program_id INT); INSERT INTO completed_training (employee_id, program_id) VALUES (1, 3), (2, 2), (3, 3);", "sql": "SELECT MIN(salary) FROM employees e JOIN completed_training ct ON e.employee_id = ct.employee_id JOIN training_programs tp ON ct.program_id = tp.program_id WHERE e.department = 'Finance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "How many cases were handled by female attorneys in the criminal law specialty?", "schema": "CREATE TABLE attorneys (id INT, name VARCHAR(50), gender VARCHAR(50), years_of_experience INT, specialty VARCHAR(50)); INSERT INTO attorneys (id, name, gender, years_of_experience, specialty) VALUES (1, 'John Doe', 'Male', 12, 'Criminal Law'); INSERT INTO attorneys (id, name, gender, years_of_experience, specialty) VALUES (2, 'Jane Smith', 'Female', 5, 'Family Law'); INSERT INTO attorneys (id, name, gender, years_of_experience, specialty) VALUES (3, 'Maria Garcia', 'Female', 8, 'Criminal Law');", "sql": "SELECT COUNT(*) FROM attorneys WHERE gender = 'Female' AND specialty = 'Criminal Law';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the maximum daily revenue for each line in the 'mumbai' schema?", "schema": "CREATE TABLE mumbai.lines (id INT, line_name VARCHAR); CREATE TABLE mumbai.revenue (id INT, line_id INT, daily_revenue DECIMAL);", "sql": "SELECT mumbai.lines.line_name, MAX(mumbai.revenue.daily_revenue) FROM mumbai.lines INNER JOIN mumbai.revenue ON mumbai.lines.id = mumbai.revenue.line_id GROUP BY mumbai.lines.line_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the total number of Matched that has the Strike Rate smallet than 152.3, and the Balls of 395?", "schema": "CREATE TABLE table_name_88 (matches VARCHAR, strike_rate VARCHAR, balls VARCHAR)", "sql": "SELECT COUNT(matches) FROM table_name_88 WHERE strike_rate < 152.3 AND balls = 395;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the maximum area of land (in hectares) used for agriculture in each region in the agro_regions dataset?", "schema": "CREATE TABLE agro_regions (id INT, region VARCHAR(255), land_area INT);", "sql": "SELECT region, MAX(land_area) FROM agro_regions GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of number disc for jimmy kaufman", "schema": "CREATE TABLE table_15430606_1 (no_disc VARCHAR, directed_by VARCHAR)", "sql": "SELECT COUNT(no_disc) FROM table_15430606_1 WHERE directed_by = 'Jimmy Kaufman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Which salesperson has sold the least garments in the last 30 days, ordered by the number of garments sold?", "schema": "CREATE TABLE salesperson (salesperson_id INT, name VARCHAR(50)); CREATE TABLE sales (sales_id INT, salesperson_id INT, sale_date DATE);", "sql": "SELECT salesperson.name, COUNT(sales.sales_id) AS quantity_sold FROM salesperson INNER JOIN sales ON salesperson.salesperson_id = sales.salesperson_id WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY salesperson.name ORDER BY quantity_sold ASC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 257, "num_statements": 1} {"question": "What is the average energy consumption of buildings in the 'urban' schema, grouped by city?", "schema": "CREATE TABLE urban.buildings (city VARCHAR(255), energy_consumption INT); INSERT INTO urban.buildings (city, energy_consumption) VALUES ('CityA', 1200), ('CityA', 1500), ('CityB', 1700), ('CityB', 1300);", "sql": "SELECT city, AVG(energy_consumption) FROM urban.buildings GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many drought-affected regions are there in the African continent?", "schema": "CREATE TABLE droughts (id INT, region VARCHAR(50), continent VARCHAR(20)); INSERT INTO droughts (id, region, continent) VALUES (1, 'Eastern Cape', 'Africa'), (2, 'Western Cape', 'Africa'), (3, 'Northern Cape', 'Africa'), (4, 'Buenos_Aires', 'South_America');", "sql": "SELECT COUNT(*) FROM droughts WHERE continent = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What denomination is mt lawley?", "schema": "CREATE TABLE table_name_5 (denomination VARCHAR, location VARCHAR)", "sql": "SELECT denomination FROM table_name_5 WHERE location = 'mt lawley';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "How many employees are there in each department in the 'hr' schema, grouped by department and sorted by the number of employees in descending order?", "schema": "CREATE SCHEMA hr; CREATE TABLE departments (id INT, name VARCHAR); INSERT INTO departments VALUES (1, 'Marketing'); CREATE TABLE employees (id INT, name VARCHAR, department_id INT); INSERT INTO employees VALUES (1, 'John Doe', 1);", "sql": "SELECT departments.name, COUNT(*) AS num_employees FROM hr.departments JOIN hr.employees ON departments.id = employees.department_id GROUP BY departments.name ORDER BY num_employees DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.1.0--1.2.0 (assertion 74).", "schema": null, "sql": "-- is_window( schema, function, description )\nCREATE OR REPLACE FUNCTION is_window ( NAME, NAME, TEXT )\nRETURNS TEXT AS $$\n SELECT _func_compare($1, $2, _type_func('w', $1, $2), $3 );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.1.0--1.2.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 203, "num_statements": 2} {"question": "Insert a new donation of 1000 made by John Doe on 2022-02-14.", "schema": "CREATE TABLE Donors (DonorID int, Name varchar(50), TotalDonation money); CREATE TABLE Donations (DonationID int, DonorID int, Amount money, DonationDate date); INSERT INTO Donors (DonorID, Name, TotalDonation) VALUES (1, 'John Doe', 5000), (2, 'Jane Smith', 7000); INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (1, 1, 500, '2021-01-01'), (2, 1, 2000, '2021-06-15'), (3, 2, 6000, '2021-03-25');", "sql": "INSERT INTO Donations (DonationID, DonorID, Amount, DonationDate) VALUES (4, 1, 1000, '2022-02-14');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In 2009, what Appearances had a Winning Percentage of less than 0?", "schema": "CREATE TABLE table_name_30 (appearances INTEGER, season_s_ VARCHAR, winning_percentage VARCHAR)", "sql": "SELECT AVG(appearances) FROM table_name_30 WHERE season_s_ = '2009' AND winning_percentage < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 158).", "schema": null, "sql": "SELECT date '4714-11-23 BC'; -- out of range\nSELECT date '5874897-12-31';", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT date '4714-11-23 BC'; -- out of range\nSELECT date '5874897-12-31') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What venue did mahela jayawardene and thilan samaraweera play at?", "schema": "CREATE TABLE table_name_42 (venue VARCHAR, batting_partners VARCHAR)", "sql": "SELECT venue FROM table_name_42 WHERE batting_partners = 'mahela jayawardene and thilan samaraweera';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the average budget for peacekeeping operations in Africa since 2015?", "schema": "CREATE TABLE PeacekeepingOperations (id INT PRIMARY KEY, operation VARCHAR(100), location VARCHAR(50), year INT, budget INT); INSERT INTO PeacekeepingOperations (id, operation, location, year, budget) VALUES (1, 'MINUSCA', 'Central African Republic', 2016, 864731532);", "sql": "SELECT AVG(budget) FROM PeacekeepingOperations WHERE location LIKE '%Africa%' AND year >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 282).", "schema": null, "sql": "SELECT '2014-10-26 00:00:00 MSK'::timestamptz;", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2014-10-26 00:00:00 MSK'::timestamptz) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the venue where St Kilda was the opposing away team", "schema": "CREATE TABLE table_name_76 (venue VARCHAR, away_team VARCHAR)", "sql": "SELECT venue FROM table_name_76 WHERE away_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Update the quantity of 'Tank' sales records in 'Asia' to 22 for the year '2025'", "schema": "CREATE TABLE military_sales (id INT PRIMARY KEY, region VARCHAR(20), year INT, equipment_name VARCHAR(30), quantity INT, value FLOAT); INSERT INTO military_sales (id, region, year, equipment_name, quantity, value) VALUES (1, 'Asia', 2025, 'Fighter Jet', 10, 6000000), (2, 'Asia', 2025, 'Tank', 20, 13000000), (3, 'Asia', 2025, 'Helicopter', 15, 9000000);", "sql": "UPDATE military_sales SET quantity = 22 WHERE region = 'Asia' AND equipment_name = 'Tank' AND year = 2025;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What role is spoken in hindi in 2011?", "schema": "CREATE TABLE table_name_26 (role VARCHAR, language VARCHAR, year VARCHAR)", "sql": "SELECT role FROM table_name_26 WHERE language = 'hindi' AND year = 2011;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the lowest price of vegan makeup products in the UK?", "schema": "CREATE TABLE Makeup_Products (ProductID int, ProductName varchar(100), Country varchar(50), IsVegan bit, RetailPrice decimal(5,2)); INSERT INTO Makeup_Products (ProductID, ProductName, Country, IsVegan, RetailPrice) VALUES (1, 'Vegan Mascara', 'UK', 1, 14.99); INSERT INTO Makeup_Products (ProductID, ProductName, Country, IsVegan, RetailPrice) VALUES (2, 'Natural Eyeshadow', 'UK', 0, 10.99); INSERT INTO Makeup_Products (ProductID, ProductName, Country, IsVegan, RetailPrice) VALUES (3, 'Cruelty-Free Lipstick', 'UK', 1, 8.99);", "sql": "SELECT MIN(RetailPrice) FROM Makeup_Products WHERE Country = 'UK' AND IsVegan = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT SERIES WAS RELEASED BY ITV STUDIOS AND A RELEASE DATE OF AUGUST 15, 2012?", "schema": "CREATE TABLE table_name_47 (series VARCHAR, released_by VARCHAR, release_date VARCHAR)", "sql": "SELECT series FROM table_name_47 WHERE released_by = 'itv studios' AND release_date = 'august 15, 2012';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 348).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (8,4,'7874342.4119');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the total number of volunteers in India who have completed more than 10 hours of service?", "schema": "CREATE TABLE volunteers (volunteer_id int, hours_served int, country varchar(50)); INSERT INTO volunteers (volunteer_id, hours_served, country) VALUES (1, 12, 'India'), (2, 5, 'India'), (3, 20, 'India');", "sql": "SELECT COUNT(volunteer_id) FROM volunteers WHERE country = 'India' GROUP BY volunteer_id HAVING hours_served > 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the average account balance for customers in each city?", "schema": "CREATE TABLE accounts (account_id INT, customer_id INT, account_balance DECIMAL(10,2)); CREATE TABLE customers (customer_id INT, customer_name VARCHAR(255), city VARCHAR(255)); INSERT INTO accounts (account_id, customer_id, account_balance) VALUES (1, 1, 1000.00), (2, 1, 2000.00), (3, 2, 500.00), (4, 3, 1500.00); INSERT INTO customers (customer_id, customer_name, city) VALUES (1, 'John Doe', 'New York'), (2, 'Jane Smith', 'Los Angeles'), (3, 'Bob Johnson', 'Chicago');", "sql": "SELECT c.city, AVG(a.account_balance) as avg_account_balance FROM accounts a INNER JOIN customers c ON a.customer_id = c.customer_id GROUP BY c.city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "What are the names and categories of the decentralized applications that have been deployed on the Binance Smart Chain and have had the most number of transactions?", "schema": "CREATE TABLE IF NOT EXISTS decentralized_applications (dapp_id INT PRIMARY KEY, name VARCHAR(100), tx_id INT, category VARCHAR(50), blockchain VARCHAR(50), FOREIGN KEY (tx_id) REFERENCES blockchain_transactions(tx_id)); CREATE TABLE IF NOT EXISTS blockchain_transactions (tx_id INT PRIMARY KEY, blockchain VARCHAR(50)); INSERT INTO blockchain_transactions (tx_id, blockchain) VALUES (1, 'Binance Smart Chain');", "sql": "SELECT dapp_name, category, COUNT(dapp_id) FROM decentralized_applications da JOIN blockchain_transactions bt ON da.tx_id = bt.tx_id WHERE bt.blockchain = 'Binance Smart Chain' GROUP BY dapp_name, category ORDER BY COUNT(dapp_id) DESC LIMIT 10;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Composite (example 30).", "schema": null, "sql": "SELECT * FROM multiout_record_as('obj', null, 2, false);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Composite.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total quantity of gold mined by each mining company?", "schema": "CREATE TABLE Mining_Company (id INT, name VARCHAR(50), location VARCHAR(50)); INSERT INTO Mining_Company (id, name, location) VALUES (1, 'CompanyA', 'USA'), (2, 'CompanyB', 'Canada'); CREATE TABLE Mining_Operation (id INT, company_id INT, mine_name VARCHAR(50), resource VARCHAR(10), quantity INT); INSERT INTO Mining_Operation (id, company_id, mine_name, resource, quantity) VALUES (1, 1, 'Mine1', 'Gold', 1000), (2, 1, 'Mine2', 'Gold', 1500), (3, 2, 'Mine3', 'Gold', 800), (4, 2, 'Mine4', 'Silver', 1200);", "sql": "SELECT m.name, SUM(quantity) as total_gold_quantity FROM Mining_Operation o JOIN Mining_Company m ON o.company_id = m.id WHERE o.resource = 'Gold' GROUP BY m.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "What is the total duration of space missions for each spacecraft model?", "schema": "CREATE TABLE space_missions (id INT, spacecraft_model VARCHAR(255), duration FLOAT);", "sql": "SELECT spacecraft_model, SUM(duration) as total_duration FROM space_missions GROUP BY spacecraft_model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "Add new community health worker record for Georgia", "schema": "CREATE TABLE community_health_workers (chw_id INT, state VARCHAR(2), name VARCHAR(50), certification_date DATE);", "sql": "INSERT INTO community_health_workers (chw_id, state, name, certification_date) VALUES (987, 'GA', 'Nia White', '2022-06-10');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What position was Jafus White who was picked after round 1?", "schema": "CREATE TABLE table_name_87 (position VARCHAR, round VARCHAR, player VARCHAR)", "sql": "SELECT position FROM table_name_87 WHERE round > 1 AND player = 'jafus white';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the maximum budget for any public works project?", "schema": "CREATE TABLE PublicWorksB(id INT, project VARCHAR(30), budget DECIMAL(10,2)); INSERT INTO PublicWorksB(id, project, budget) VALUES (1, 'Highway Construction', 800000.00), (2, 'Airport Expansion', 3000000.00);", "sql": "SELECT MAX(budget) FROM PublicWorksB;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "List policy numbers with the highest claim amount in the last quarter.", "schema": "CREATE TABLE Policy (PolicyNumber INT, PolicyholderName VARCHAR(50)); CREATE TABLE Claim (ClaimID INT, PolicyNumber INT, ClaimDate DATE, ClaimAmount DECIMAL(10,2)); INSERT INTO Policy VALUES (1, 'John Doe'), (2, 'Jane Smith'); INSERT INTO Claim VALUES (1, 1, '2021-01-01', 5000), (2, 1, '2021-02-01', 3000), (3, 2, '2021-04-01', 7000), (4, 2, '2021-05-01', 8000), (5, 2, '2021-06-01', 9000);", "sql": "SELECT PolicyNumber, MAX(ClaimAmount) as MaxClaimAmount FROM Claim WHERE ClaimDate >= DATEADD(QUARTER, -1, GETDATE()) GROUP BY PolicyNumber ORDER BY MaxClaimAmount DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 5).", "schema": null, "sql": "SELECT count(*) FROM ft1 a CROSS JOIN ft1 b CROSS JOIN ft1 c CROSS JOIN ft1 d;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the district who had their first elected in 1966?", "schema": "CREATE TABLE table_1341843_44 (district VARCHAR, first_elected VARCHAR)", "sql": "SELECT district FROM table_1341843_44 WHERE first_elected = 1966;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Insert a new record for the 'waste_reduction' department with an average salary of $40,000 and 30 employees.", "schema": "CREATE TABLE company_departments (dept_name TEXT, avg_salary NUMERIC, employee_count INTEGER);", "sql": "INSERT INTO company_departments (dept_name, avg_salary, employee_count) VALUES ('waste_reduction', 40000.00, 30);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 113, "num_statements": 1} {"question": "Which sites have both wooden and stone artifacts?", "schema": "CREATE TABLE excavation_sites (site_id INT, site_name TEXT); CREATE TABLE artifacts (artifact_id INT, site_id INT, artifact_type TEXT); INSERT INTO excavation_sites (site_id, site_name) VALUES (1, 'Site A'), (2, 'Site B'), (3, 'Site C'); INSERT INTO artifacts (artifact_id, site_id, artifact_type) VALUES (1, 1, 'wooden'), (2, 1, 'stone'), (3, 2, 'metal'), (4, 3, 'wooden'), (5, 3, 'pottery'), (6, 4, 'stone');", "sql": "SELECT e.site_name FROM excavation_sites e JOIN artifacts a ON e.site_id = a.site_id WHERE a.artifact_type IN ('wooden', 'stone') GROUP BY e.site_id HAVING COUNT(DISTINCT a.artifact_type) = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 192, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'with' (example 43).", "schema": null, "sql": "INSERT INTO tree\nVALUES (1, NULL), (2, 1), (3,1), (4,2), (5,2), (6,2), (7,3), (8,3),\n (9,4), (10,4), (11,7), (12,7), (13,7), (14, 9), (15,11), (16,11);", "explanation": "DML from PostgreSQL core regression test for With.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the turbo when the frequency is 2.2 ghz?", "schema": "CREATE TABLE table_name_63 (turbo VARCHAR, frequency VARCHAR)", "sql": "SELECT turbo FROM table_name_63 WHERE frequency = '2.2 ghz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "List all suppliers that provide products for a specific restaurant, excluding any suppliers that have had food safety violations in the past year.", "schema": "CREATE TABLE suppliers (id INT, name TEXT, restaurant TEXT, violation_date DATE);", "sql": "SELECT name FROM suppliers WHERE restaurant = 'Restaurant A' AND id NOT IN (SELECT supplier_id FROM violations WHERE violation_date >= DATEADD(year, -1, GETDATE()));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the quantity of class B V?", "schema": "CREATE TABLE table_name_55 (quantity VARCHAR, class VARCHAR)", "sql": "SELECT quantity FROM table_name_55 WHERE class = 'b v';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the height in feet for the player from valparaiso", "schema": "CREATE TABLE table_name_80 (height_in_ft VARCHAR, school_club_team_country VARCHAR)", "sql": "SELECT height_in_ft FROM table_name_80 WHERE school_club_team_country = 'valparaiso';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Insert a new record of a 'Kiwi' animal in the rescue center from the 'New Zealand Islands' region.", "schema": "CREATE TABLE rescue_center_animals (animal_id INT, animal_name VARCHAR(50), region VARCHAR(50));", "sql": "INSERT INTO rescue_center_animals (animal_id, animal_name, region) VALUES (3, 'Kiwi', 'New Zealand Islands');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Show all suppliers from 'Jakarta'", "schema": "SELECT * FROM suppliers WHERE location = 'Jakarta';", "sql": "SELECT * FROM suppliers WHERE location = 'Jakarta';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Season has a Postseason of promoted runner-up?", "schema": "CREATE TABLE table_name_33 (season VARCHAR, postseason VARCHAR)", "sql": "SELECT season FROM table_name_33 WHERE postseason = 'promoted runner-up';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 92).", "schema": null, "sql": "SELECT '3 days 5 milliseconds'::interval;", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '3 days 5 milliseconds'::interval) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "How many autonomous buses are in operation in Tokyo, Osaka, and Kyoto?", "schema": "CREATE TABLE JPAutonomousBuses (id INT, city VARCHAR(20), in_operation INT);", "sql": "SELECT SUM(in_operation) FROM JPAutonomousBuses WHERE city IN ('Tokyo', 'Osaka', 'Kyoto');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 589).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _temptable ( TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the Record on Game 50?", "schema": "CREATE TABLE table_name_68 (record VARCHAR, game VARCHAR)", "sql": "SELECT record FROM table_name_68 WHERE game = 50;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the highest opponents for record of 7-3 and vikings points more than 7", "schema": "CREATE TABLE table_name_97 (opponents INTEGER, record VARCHAR, vikings_points VARCHAR)", "sql": "SELECT MAX(opponents) FROM table_name_97 WHERE record = '7-3' AND vikings_points > 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What was the total cost of satellites deployed in 2022?", "schema": "CREATE TABLE Satellites (satellite_id INT, deployment_year INT, cost FLOAT); INSERT INTO Satellites (satellite_id, deployment_year, cost) VALUES (1, 2022, 20000000.0), (2, 2021, 15000000.0);", "sql": "SELECT SUM(cost) FROM Satellites WHERE deployment_year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "pgTAP test for Performs Ok (assertion 8).", "schema": null, "sql": "SELECT * FROM check_test(\n performs_ok( 'SELECT TRUE', 0, 'whatever' ),\n false,\n 'simple select fail',\n 'whatever',\n ' runtime: [[:digit:]]+([.][[:digit:]]+)? ms\n exceeds: 0 ms',\n true\n);", "explanation": "SQL assertion from pgTAP test suite for Performs Ok.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "What is the average consumer awareness score for the ethical fashion brand 'GreenFashions' in the 'Asia' region, and how does it compare to the global average?", "schema": "CREATE TABLE consumer_awareness (id INT PRIMARY KEY, brand VARCHAR(255), region VARCHAR(255), score INT); INSERT INTO consumer_awareness (id, brand, region, score) VALUES (1, 'GreenFashions', 'Asia', 80), (2, 'GreenFashions', 'Europe', 85), (3, 'EcoFriendlyFashions', 'Asia', 75), (4, 'EcoFriendlyFashions', 'USA', 90); CREATE TABLE regions (id INT PRIMARY KEY, region VARCHAR(255)); INSERT INTO regions (id, region) VALUES (1, 'Asia'), (2, 'Europe'), (3, 'USA');", "sql": "SELECT AVG(ca.score) as avg_score FROM consumer_awareness ca JOIN regions r ON ca.region = r.region WHERE ca.brand = 'GreenFashions' AND r.region = 'Asia'; SELECT AVG(score) as global_avg_score FROM consumer_awareness WHERE brand = 'GreenFashions';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 248, "num_statements": 2} {"question": "Delete the 'Director of Engineering' job title from the JobTitle table", "schema": "CREATE TABLE JobTitle (JobTitleID INT PRIMARY KEY, JobTitleName VARCHAR(50));", "sql": "DELETE FROM JobTitle WHERE JobTitleName = 'Director of Engineering';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Update the game_mode column in the game_sessions table to 'Capture the Flag' for all sessions on the maps table with the name 'Stadium'", "schema": "CREATE TABLE game_sessions (session_id INT, player_id INT, game_mode VARCHAR(20), map_id INT); CREATE TABLE maps (map_id INT, map_name VARCHAR(20));", "sql": "UPDATE game_sessions SET game_mode = 'Capture the Flag' WHERE map_id IN (SELECT map_id FROM maps WHERE map_name = 'Stadium');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Delete the bus route 12 from the public transportation system of San Francisco", "schema": "CREATE TABLE bus_routes (id INT PRIMARY KEY, route_number INT, route_name VARCHAR(255), city VARCHAR(255), num_stops INT);", "sql": "DELETE FROM bus_routes WHERE route_number = 12 AND city = 'San Francisco';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the lowest Losses recorded wiht a Wins of 1, Team of Dallas Stars, and a Win % that's smaller than 0.25?", "schema": "CREATE TABLE table_name_60 (losses INTEGER, win__percentage VARCHAR, wins VARCHAR, team VARCHAR)", "sql": "SELECT MIN(losses) FROM table_name_60 WHERE wins = 1 AND team = 'dallas stars' AND win__percentage < 0.25;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Tell me the driver for ford cosworth dfv 3.0 v8 and chassis of ts19 ts20", "schema": "CREATE TABLE table_name_22 (driver VARCHAR, engine VARCHAR, chassis VARCHAR)", "sql": "SELECT driver FROM table_name_22 WHERE engine = 'ford cosworth dfv 3.0 v8' AND chassis = 'ts19 ts20';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the total water consumption by city in 2020, considering domestic, commercial, and agricultural consumption?", "schema": "CREATE TABLE water_usage (city VARCHAR(255), year INT, domestic_consumption INT, commercial_consumption INT, agricultural_consumption INT); INSERT INTO water_usage (city, year, domestic_consumption, commercial_consumption, agricultural_consumption) VALUES ('CityA', 2020, 350, 250, 550), ('CityB', 2020, 450, 350, 650), ('CityC', 2020, 500, 400, 700), ('CityD', 2020, 400, 300, 600);", "sql": "SELECT city, (domestic_consumption + commercial_consumption + agricultural_consumption) as total_consumption FROM water_usage WHERE year = 2020 GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 158, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where is the miss global teen?", "schema": "CREATE TABLE table_1825751_14 (hometown VARCHAR, pageant VARCHAR)", "sql": "SELECT hometown FROM table_1825751_14 WHERE pageant = 'Miss Global Teen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the number of military innovation patents filed by country in 2021?", "schema": "CREATE TABLE military_patents (patent_name VARCHAR(255), country VARCHAR(255), year INT); INSERT INTO military_patents (patent_name, country, year) VALUES ('Patent 1', 'USA', 2021), ('Patent 2', 'China', 2021), ('Patent 3', 'Russia', 2021);", "sql": "SELECT country, COUNT(patent_name) FROM military_patents WHERE year = 2021 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Coach with a Big Ten that is 3rd (278)?", "schema": "CREATE TABLE table_name_92 (coach VARCHAR, big_ten VARCHAR)", "sql": "SELECT coach FROM table_name_92 WHERE big_ten = '3rd (278)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the average workout duration for users in the age group 25-34?", "schema": "CREATE TABLE UserData (UserID INT, Age INT); CREATE TABLE WorkoutData (UserID INT, WorkoutDuration INT); INSERT INTO UserData (UserID, Age) VALUES (1, 27), (2, 32), (3, 24), (4, 35), (5, 29); INSERT INTO WorkoutData (UserID, WorkoutDuration) VALUES (1, 45), (1, 60), (2, 30), (3, 75), (3, 45), (4, 30), (5, 60);", "sql": "SELECT AVG(WorkoutDuration) FROM WorkoutData INNER JOIN UserData ON WorkoutData.UserID = UserData.UserID WHERE Age BETWEEN 25 AND 34;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "pgTAP test for Partitions (assertion 17).", "schema": null, "sql": "SELECT * FROM check_test(\n is_partition_of( 'part1', 'parted', 'whatevs' ),\n true,\n 'is_partition_of( ctab, ptab, desc )',\n 'whatevs',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Partitions.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What is the maximum sentence length for offenders who participated in restorative justice programs in California?", "schema": "CREATE TABLE restorative_justice_sentences (sentence_id INT, program_id INT, sentence_length INT, state VARCHAR(2)); INSERT INTO restorative_justice_sentences (sentence_id, program_id, sentence_length, state) VALUES (1, 1001, 18, 'CA'), (2, 1002, 24, 'CA');", "sql": "SELECT MAX(sentence_length) FROM restorative_justice_sentences WHERE state = 'CA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "How many players have participated in esports events, and what is the average age of these players?", "schema": "CREATE TABLE Players (id INT, name VARCHAR(50), age INT, participated_in_esports_event BOOLEAN); INSERT INTO Players (id, name, age, participated_in_esports_event) VALUES (1, 'Player1', 25, TRUE), (2, 'Player2', 30, FALSE), (3, 'Player3', 35, TRUE);", "sql": "SELECT COUNT(*) AS num_players, AVG(age) AS avg_age FROM Players WHERE participated_in_esports_event = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: 3488 is the enrollment amount of what college?", "schema": "CREATE TABLE table_1183842_1 (institution VARCHAR, enrollment VARCHAR)", "sql": "SELECT institution FROM table_1183842_1 WHERE enrollment = 3488;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What college did Byron Williams attend?", "schema": "CREATE TABLE table_14655757_1 (college VARCHAR, player VARCHAR)", "sql": "SELECT college FROM table_14655757_1 WHERE player = 'Byron Williams';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List the number of male members in each region who have not participated in any classes in the last week and their latest membership date.", "schema": "CREATE TABLE Members (MemberID INT, Gender VARCHAR(10), Region VARCHAR(20), MembershipDate DATE); INSERT INTO Members (MemberID, Gender, Region, MembershipDate) VALUES (10, 'Male', 'Midwest', '2021-01-01'); CREATE TABLE Classes (ClassID INT, ClassType VARCHAR(20), Duration INT, MemberID INT); INSERT INTO Classes (ClassID, ClassType, Duration, MemberID) VALUES (100, 'Yoga', 60, 10);", "sql": "SELECT Members.Region, COUNT(Members.MemberID) AS InactiveMembers, MAX(Members.MembershipDate) AS LatestMembershipDate FROM Members LEFT JOIN Classes ON Members.MemberID = Classes.MemberID WHERE Members.Gender = 'Male' AND Classes.MemberID IS NULL AND Members.MembershipDate <= DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK) GROUP BY Members.Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 342, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 125).", "schema": null, "sql": "SELECT (timestamp without time zone '10:30 today' = (timestamp without time zone 'yesterday' + interval '1 day 10 hr 30 min')) as \"True\";", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (timestamp without time zone '10:30 today' = (timestamp without time zone 'yesterday' + interval '1 day 10 hr 30 min')) as \"True\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many sets were played in 2007 at the tournament that had 4R in 2011 and 2R in 2009?", "schema": "CREATE TABLE table_name_89 (Id VARCHAR)", "sql": "SELECT 2007 FROM table_name_89 WHERE 2011 = '4r' AND 2009 = '2r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the womens singles for raina tzvetkova petya nedelcheva", "schema": "CREATE TABLE table_14903491_1 (womens_singles VARCHAR, womens_doubles VARCHAR)", "sql": "SELECT womens_singles FROM table_14903491_1 WHERE womens_doubles = 'Raina Tzvetkova Petya Nedelcheva';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'jsonb' (example 860).", "schema": null, "sql": "update test_jsonb_subscript set test_json['a'][2]['b'][2]['c'][2] = '1';", "explanation": "DML from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total exploration cost in the 'ArabianSea' for wells drilled after 2010?", "schema": "CREATE TABLE WellExplorationCosts (well_id INT, drill_year INT, exploration_cost REAL); INSERT INTO WellExplorationCosts (well_id, drill_year, exploration_cost) VALUES (1, 2008, 2000000), (2, 2012, 3000000), (3, 2015, 1500000);", "sql": "SELECT SUM(exploration_cost) FROM WellExplorationCosts WHERE region = 'ArabianSea' AND drill_year > 2010;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 135).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (3,0,'NaN');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "What is the total TEU handling for each port in the cargo_handling table, ordered by handling date in descending order?", "schema": "CREATE TABLE cargo_handling (port_id INT, port_name VARCHAR(50), teu_count INT, handling_date DATE); INSERT INTO cargo_handling (port_id, port_name, teu_count, handling_date) VALUES (1, 'Port_A', 2000, '2022-01-03'), (2, 'Port_B', 3000, '2022-01-02'), (3, 'Port_C', 1000, '2022-01-01');", "sql": "SELECT port_name, SUM(teu_count) OVER (PARTITION BY port_name ORDER BY handling_date DESC) as total_teu FROM cargo_handling;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 124, "num_statements": 1} {"question": "What is the policy number, policyholder name, and issue date for policies issued in California?", "schema": "CREATE TABLE policies (policy_number INT, policyholder_name TEXT, issue_date DATE, state TEXT); INSERT INTO policies (policy_number, policyholder_name, issue_date, state) VALUES (12345, 'John Doe', '2021-06-01', 'California');", "sql": "SELECT policy_number, policyholder_name, issue_date FROM policies WHERE state = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 129).", "schema": null, "sql": "SELECT '294277-01-01 00:00:00+00'::timestamptz; -- out of range\nSELECT '294277-12-31 16:00:00-08'::timestamptz; -- out of range\n\n-- Demonstrate functions and operators\nSELECT d1 FROM TIMESTAMPTZ_TBL\n WHERE d1 > timestamp with time zone '1997-01-02';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '294277-01-01 00:00:00+00'::timestamptz; -- out of range\nSELECT '294277-12-31 16:00:00-08'::timestamptz; -- out of range\n\n-- Demonstrate functions and operators\nSELECT d1 FROM TIMESTAMPTZ_TBL\n WHERE d1 > timestamp with time zone '1997-01-02') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 253, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is the average gold total for nations ranked 6 with 1 total medal and 1 bronze medal?", "schema": "CREATE TABLE table_name_90 (gold INTEGER, bronze VARCHAR, total VARCHAR, rank VARCHAR)", "sql": "SELECT AVG(gold) FROM table_name_90 WHERE total = 1 AND rank = '6' AND bronze > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total Lane with a Mark of 47.02, and a Heat higher than 5?", "schema": "CREATE TABLE table_name_64 (lane INTEGER, mark VARCHAR, heat VARCHAR)", "sql": "SELECT SUM(lane) FROM table_name_64 WHERE mark = '47.02' AND heat > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 90).", "schema": null, "sql": "CREATE TABLE testpub_parted1 (LIKE testpub_parted);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "How many volunteers and total donation amount for each program in India?", "schema": "CREATE TABLE Donations (id INT, donation_amount DECIMAL(10,2), donation_date DATE, program VARCHAR(50), country VARCHAR(50)); CREATE TABLE Volunteers (id INT, volunteer_name VARCHAR(50), program VARCHAR(50), country VARCHAR(50)); INSERT INTO Donations (id, donation_amount, donation_date, program, country) VALUES (1, 50.00, '2021-02-01', 'Women Empowerment', 'India'); INSERT INTO Donations (id, donation_amount, donation_date, program, country) VALUES (2, 150.00, '2021-02-02', 'Education', 'India'); INSERT INTO Volunteers (id, volunteer_name, program, country) VALUES (1, 'Deepika Singh', 'Women Empowerment', 'India'); INSERT INTO Volunteers (id, volunteer_name, program, country) VALUES (2, 'Ravi Kumar', 'Education', 'India');", "sql": "SELECT p.program, COUNT(DISTINCT v.volunteer_name) as num_volunteers, SUM(d.donation_amount) as total_donations FROM Donations d INNER JOIN Volunteers v ON d.program = v.program INNER JOIN Programs p ON d.program = p.program WHERE d.country = 'India' GROUP BY p.program;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1} {"question": "Delete the record for 'ManufacturerE' from the circular_economy_initiatives table, as they are no longer participating.", "schema": "CREATE TABLE circular_economy_initiatives (initiative_id INT, manufacturer_name TEXT, initiative_description TEXT); INSERT INTO circular_economy_initiatives (initiative_id, manufacturer_name, initiative_description) VALUES (1, 'ManufacturerA', 'Recycling Program'), (2, 'ManufacturerB', 'Remanufacturing Program'), (3, 'ManufacturerC', 'Waste Reduction Program'), (4, 'ManufacturerE', 'Upcycling Program');", "sql": "DELETE FROM circular_economy_initiatives WHERE manufacturer_name = 'ManufacturerE';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 52).", "schema": null, "sql": "select x, y\n from (select t1.unique1 as x, t1.unique2 as y from tenk1 t1, tenk1 t2) as t\n where x = 1\n group by grouping sets (x, y)\n order by 1, 2;", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select x, y\n from (select t1.unique1 as x, t1.unique2 as y from tenk1 t1, tenk1 t2) as t\n where x = 1\n group by grouping sets (x, y)\n order by 1, 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 152, "num_statements": 1} {"question": "What is the average speed of public buses in CityA?", "schema": "CREATE TABLE CityA_BusRoutes (route_id INT, avg_speed FLOAT, vehicle_type VARCHAR(20)); INSERT INTO CityA_BusRoutes (route_id, avg_speed, vehicle_type) VALUES (1, 45.6, 'Bus'), (2, 38.2, 'Bus'), (3, 48.7, 'Bus');", "sql": "SELECT AVG(avg_speed) FROM CityA_BusRoutes WHERE vehicle_type = 'Bus';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "PostgreSQL Textsearch: show example 74.", "schema": null, "sql": "SELECT ts_lexize('english_stem', 'stars'); ts_lexize ----------- {star} SELECT ts_lexize('english_stem', 'a'); ts_lexize ----------- {};", "explanation": "Example from PostgreSQL documentation on Textsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 3} {"question": "Generate PostgreSQL SQL for: What is the release date for Destiny of the Daleks?", "schema": "CREATE TABLE table_1681535_1 (release_date VARCHAR, title VARCHAR)", "sql": "SELECT release_date FROM table_1681535_1 WHERE title = 'Destiny of the Daleks';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the 2010 for 2006 of 2r", "schema": "CREATE TABLE table_name_9 (Id VARCHAR)", "sql": "SELECT 2010 FROM table_name_9 WHERE 2006 = '2r';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date is the winning team Abt Sportsline and pole position Mattias Ekström in the Circuit Ricardo Tormo , Valencia?", "schema": "CREATE TABLE table_26267607_2 (date VARCHAR, circuit VARCHAR, winning_team VARCHAR, pole_position VARCHAR)", "sql": "SELECT date FROM table_26267607_2 WHERE winning_team = 'Abt Sportsline' AND pole_position = 'Mattias Ekström' AND circuit = 'circuit Ricardo Tormo , Valencia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What character is played by dani harmer for under 23 episodes?", "schema": "CREATE TABLE table_name_51 (character VARCHAR, episodes VARCHAR, actor VARCHAR)", "sql": "SELECT character FROM table_name_51 WHERE episodes < 23 AND actor = 'dani harmer';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Insert new record into 'geological_survey' table with 'survey_date' as '2022-04-22' and 'survey_type' as 'Ground Penetrating Radar'", "schema": "CREATE TABLE geological_survey (survey_date DATE, survey_type VARCHAR(255), PRIMARY KEY (survey_date, survey_type));", "sql": "INSERT INTO geological_survey (survey_date, survey_type) VALUES ('2022-04-22', 'Ground Penetrating Radar');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the main use for the structure listed in walker city, iowa?", "schema": "CREATE TABLE table_name_25 (main_use VARCHAR, town VARCHAR)", "sql": "SELECT main_use FROM table_name_25 WHERE town = 'walker city, iowa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the set 5 if the Date is jun 7, and a Set 4 is 21-25?", "schema": "CREATE TABLE table_name_65 (set_5 VARCHAR, date VARCHAR, set_4 VARCHAR)", "sql": "SELECT set_5 FROM table_name_65 WHERE date = 'jun 7' AND set_4 = '21-25';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "List all the unique organizations involved in disaster_response efforts and their respective total spending.", "schema": "CREATE TABLE disaster_response (org_id INT, org_name VARCHAR(50), country VARCHAR(50), spending DECIMAL(10,2)); INSERT INTO disaster_response (org_id, org_name, country, spending) VALUES (1, 'UNICEF', 'USA', 500000.00), (2, 'Red Cross', 'Canada', 350000.00);", "sql": "SELECT DISTINCT org_name, SUM(spending) FROM disaster_response GROUP BY org_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the fee for ankaragücü previous club", "schema": "CREATE TABLE table_27998152_1 (fee VARCHAR, previous_club VARCHAR)", "sql": "SELECT fee FROM table_27998152_1 WHERE previous_club = 'Ankaragücü';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What do the notes say for 1989 May 7?", "schema": "CREATE TABLE table_name_32 (notes VARCHAR, date VARCHAR)", "sql": "SELECT notes FROM table_name_32 WHERE date = '1989 may 7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the bowling score of season 1907?", "schema": "CREATE TABLE table_name_23 (bowling VARCHAR, season VARCHAR)", "sql": "SELECT bowling FROM table_name_23 WHERE season = '1907';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Yogi Bear that aired on 1959.12.21?", "schema": "CREATE TABLE table_19860361_3 (yogi_bear VARCHAR, air_date VARCHAR)", "sql": "SELECT yogi_bear FROM table_19860361_3 WHERE air_date = '1959.12.21';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 776).", "schema": null, "sql": "--\n-- Check argument name is used instead of $n in error message\n--\nCREATE FUNCTION fx(x WSlot) RETURNS void AS $$\nBEGIN\n GET DIAGNOSTICS x = ROW_COUNT;\n RETURN;\nEND; $$ LANGUAGE plpgsql;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 189, "num_statements": 4} {"question": "Generate PostgreSQL SQL for: Which Born-Died has a Term start of 4 november 1943?", "schema": "CREATE TABLE table_name_52 (born_died VARCHAR, term_start VARCHAR)", "sql": "SELECT born_died FROM table_name_52 WHERE term_start = '4 november 1943';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "What is the average rating of films produced in Spain and Italy?", "schema": "CREATE TABLE Films (film_id INT, title VARCHAR(255), release_date DATE, rating FLOAT, production_country VARCHAR(50)); INSERT INTO Films (film_id, title, release_date, rating, production_country) VALUES (1, 'Movie1', '2000-01-01', 7.5, 'Spain'), (2, 'Movie2', '2005-01-01', 8.2, 'Italy'), (3, 'Movie3', '2010-01-01', 6.8, 'France');", "sql": "SELECT AVG(rating) FROM Films WHERE production_country IN ('Spain', 'Italy');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the payment date of the payment with amount paid higher than 300 or with payment type is 'Check'", "schema": "CREATE TABLE payments (payment_date VARCHAR, amount_paid VARCHAR, payment_type_code VARCHAR)", "sql": "SELECT payment_date FROM payments WHERE amount_paid > 300 OR payment_type_code = 'Check';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score vs. Protoss when the score vs. Terran is 10 wins?", "schema": "CREATE TABLE table_name_97 (vs_protoss VARCHAR, vs_terran VARCHAR)", "sql": "SELECT vs_protoss FROM table_name_97 WHERE vs_terran = '10 wins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total revenue for the 'Organic Greens' restaurant?", "schema": "CREATE TABLE Restaurants (restaurant_name TEXT, revenue FLOAT); INSERT INTO Restaurants (restaurant_name, revenue) VALUES ('Urban Plate', 45000.0), ('Organic Greens', 38000.0), ('Fiesta Mex', 52000.0);", "sql": "SELECT revenue FROM Restaurants WHERE restaurant_name = 'Organic Greens';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 4).", "schema": null, "sql": "INSERT INTO test_bytea VALUES ('a'),('ab'),('abc'),('abb'),('axy'),('xyz');", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 105).", "schema": null, "sql": "select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)\non (x1 = xx1) where (xx2 is not null);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)\non (x1 = xx1) where (xx2 is not null)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the name and number of schools in each city?", "schema": "CREATE TABLE cities (id INT, name VARCHAR(255)); CREATE TABLE schools (id INT, city_id INT, name VARCHAR(255), number INT);", "sql": "SELECT c.name, s.number FROM cities c JOIN schools s ON c.id = s.city_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Find the average funding amount for companies founded by individuals from Latin America", "schema": "CREATE TABLE company_founding(id INT PRIMARY KEY, company_name VARCHAR(100), founder_country VARCHAR(50), funding_amount INT); INSERT INTO company_founding VALUES (1, 'Acme Inc', 'Brazil', 1000000); INSERT INTO company_founding VALUES (2, 'Beta Corp', 'Argentina', 2000000); INSERT INTO company_founding VALUES (3, 'Charlie LLC', 'Canada', 500000); INSERT INTO company_founding VALUES (4, 'Delta Inc', 'Mexico', 750000);", "sql": "SELECT AVG(funding_amount) FROM company_founding WHERE founder_country IN ('Brazil', 'Argentina', 'Mexico');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Gary Barnett who has been in 0 FA Cups plays what position?", "schema": "CREATE TABLE table_name_7 (position VARCHAR, fa_cup_apps VARCHAR, name VARCHAR)", "sql": "SELECT position FROM table_name_7 WHERE fa_cup_apps = '0' AND name = 'gary barnett';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 64).", "schema": null, "sql": "INSERT INTO t1 VALUES ( 'bb', 11 );", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Update the type of circular economy with ID 2 to 'Redesign'", "schema": "CREATE SCHEMA manufacturing;CREATE TABLE circular_economy (id INT PRIMARY KEY, name TEXT, type TEXT);INSERT INTO circular_economy (id, name, type) VALUES (1, 'Circular Economy A', 'Reuse'); INSERT INTO circular_economy (id, name, type) VALUES (2, 'Circular Economy B', 'Reduce'); INSERT INTO circular_economy (id, name, type) VALUES (3, 'Circular Economy C', 'Repurpose');", "sql": "UPDATE circular_economy SET type = 'Redesign' WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the minimum number of public hospitals in the state of \"Texas\"?", "schema": "CREATE TABLE hospitals (hospital_id INT, hospital_name TEXT, state TEXT, type TEXT); INSERT INTO hospitals (hospital_id, hospital_name, state, type) VALUES (1, 'Texas Medical Center', 'Texas', 'Public'), (2, 'Methodist Hospital', 'Texas', 'Private'), (3, 'Memorial Hermann Hospital', 'Texas', 'Public');", "sql": "SELECT MIN(hospital_id) FROM hospitals WHERE state = 'Texas' AND type = 'Public';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Show a SQL definition from the pglogical project (pglogical--2.2.1, item 43).", "schema": null, "sql": "CREATE FUNCTION pglogical.pglogical_gen_slot_name(name, name, name)\nRETURNS name\nIMMUTABLE STRICT LANGUAGE c AS 'MODULE_PATHNAME';", "explanation": "SQL definition from the open-source pglogical PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "What are the names and research interests of female professors in the Computer Science department?", "schema": "CREATE TABLE professors (id INT, name TEXT, gender TEXT, research_interest TEXT); INSERT INTO professors (id, name, gender, research_interest) VALUES (1, 'Alice', 'Female', 'Machine Learning'); INSERT INTO professors (id, name, gender, research_interest) VALUES (2, 'Bob', 'Male', 'Data Science');", "sql": "SELECT name, research_interest FROM professors WHERE gender = 'Female' AND department = 'Computer Science';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When North Melbourne played as the away team, what was the crowd numbers?", "schema": "CREATE TABLE table_name_56 (crowd VARCHAR, away_team VARCHAR)", "sql": "SELECT crowd FROM table_name_56 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total amount of water treated in wastewater treatment plants in the New York region in the last month?", "schema": "CREATE TABLE wastewater_treatment (region VARCHAR(20), plant_id INT, treated_water FLOAT, timestamp TIMESTAMP); INSERT INTO wastewater_treatment (region, plant_id, treated_water, timestamp) VALUES ('New York', 1, 500000, '2022-01-01 10:00:00'), ('New York', 2, 600000, '2022-02-01 10:00:00');", "sql": "SELECT SUM(treated_water) FROM wastewater_treatment WHERE region = 'New York' AND timestamp BETWEEN DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 MONTH) AND CURRENT_TIMESTAMP;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the location where aaron grundy was the loser?", "schema": "CREATE TABLE table_name_27 (location VARCHAR, loser VARCHAR)", "sql": "SELECT location FROM table_name_27 WHERE loser = 'aaron grundy';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Which manufacturers in Sweden have invested more than 400,000 in Industry 4.0?", "schema": "CREATE TABLE manufacturers (manufacturer_id INT, name VARCHAR(255), location VARCHAR(255), industry_4_0_investment FLOAT); INSERT INTO manufacturers (manufacturer_id, name, location, industry_4_0_investment) VALUES (1, 'Smart Machines', 'Germany', 350000), (2, 'Eco Engines', 'Sweden', 420000), (3, 'Precision Robotics', 'Japan', 500000), (4, 'Green Innovations', 'Sweden', 375000), (5, 'FutureTech', 'USA', 410000);", "sql": "SELECT m.name, m.industry_4_0_investment FROM manufacturers m WHERE m.location = 'Sweden' AND m.industry_4_0_investment > 400000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Show the total fuel efficiency for vessels with IDs \"VT-123\" and \"VT-456\" from the \"vessel_performance\" table.", "schema": "CREATE TABLE vessel_performance (id INT PRIMARY KEY, vessel_id INT, max_speed FLOAT, avg_speed FLOAT, fuel_efficiency FLOAT);", "sql": "SELECT SUM(fuel_efficiency) FROM vessel_performance WHERE vessel_id IN (123, 456);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Insert new soil moisture data for farm_id 987", "schema": "CREATE TABLE soil_moisture (id INT, farm_id INT, moisture_level FLOAT, measurement_date DATE);", "sql": "INSERT INTO soil_moisture (id, farm_id, moisture_level, measurement_date) VALUES (5, 987, 45.6, '2022-06-01');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest laps that has a manufacturer of yamaha, and a time/retired of +1:08.312, and a grid less than 20?", "schema": "CREATE TABLE table_name_4 (laps INTEGER, grid VARCHAR, manufacturer VARCHAR, time_retired VARCHAR)", "sql": "SELECT MIN(laps) FROM table_name_4 WHERE manufacturer = 'yamaha' AND time_retired = '+1:08.312' AND grid < 20;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What engine has an output of ps (kw; hp) @4700 rpm?", "schema": "CREATE TABLE table_name_85 (engine VARCHAR, output VARCHAR)", "sql": "SELECT engine FROM table_name_85 WHERE output = 'ps (kw; hp) @4700 rpm';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 2} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 149).", "schema": null, "sql": "SELECT jsonb_typeof('{\"c\":3,\"p\":\"o\"}') AS object;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_typeof('{\"c\":3,\"p\":\"o\"}') AS object) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What is the total CO2 emission for each factory in the Western region by country?", "schema": "CREATE TABLE factories (factory_id INT, factory_name VARCHAR(50), country VARCHAR(50), co2_emission INT); CREATE TABLE country_regions (country VARCHAR(50), region VARCHAR(50));", "sql": "SELECT country, SUM(co2_emission) FROM factories JOIN country_regions ON factories.country = country_regions.country WHERE region = 'Western' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "How many urban agriculture initiatives are there in 'Toronto'?", "schema": "CREATE TABLE urban_agriculture_initiatives (id INT, name TEXT, location TEXT); INSERT INTO urban_agriculture_initiatives (id, name, location) VALUES (1, 'Initiative A', 'Toronto'), (2, 'Initiative B', 'Montreal'), (3, 'Initiative C', 'Toronto');", "sql": "SELECT COUNT(*) FROM urban_agriculture_initiatives WHERE location = 'Toronto';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the production code for the episode written by J. H. Wyman & Jeff Pinkner?", "schema": "CREATE TABLE table_24649082_1 (production_code VARCHAR, written_by VARCHAR)", "sql": "SELECT production_code FROM table_24649082_1 WHERE written_by = 'J. H. Wyman & Jeff Pinkner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the surface in 1981?", "schema": "CREATE TABLE table_name_56 (surface VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_56 WHERE date = 1981;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Delete all records pertaining to organizations not focused on poverty alleviation.", "schema": "CREATE TABLE organizations (id INT, name VARCHAR(255), focus VARCHAR(255)); INSERT INTO organizations (id, name, focus) VALUES (1, 'Against Poverty', 'Poverty Alleviation'), (2, 'Arts and Culture Fund', 'Arts and Culture'), (3, 'Climate Foundation', 'Climate Change');", "sql": "DELETE FROM donations WHERE donations.organization_id NOT IN (SELECT id FROM organizations WHERE organizations.focus = 'Poverty Alleviation'); DELETE FROM organizations WHERE organizations.focus != 'Poverty Alleviation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 220, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: What week did the contestant finish in the bottom 2 with a Celine Dion song?", "schema": "CREATE TABLE table_name_41 (week VARCHAR, status VARCHAR, artist VARCHAR)", "sql": "SELECT week FROM table_name_41 WHERE status = 'bottom 2' AND artist = 'celine dion';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Which online travel agencies have more than 50 bookings for resorts in the Caribbean?", "schema": "CREATE TABLE online_travel_agency (id INT PRIMARY KEY, ota_name VARCHAR(50), hotel_name VARCHAR(50), region VARCHAR(50), bookings INT); INSERT INTO online_travel_agency (id, ota_name, hotel_name, region, bookings) VALUES (1, 'Travelocity', 'Caribbean Resort', 'Caribbean', 60), (2, 'Expedia', 'Mountain Lodge', 'Mountains', 45);", "sql": "SELECT ota_name, SUM(bookings) FROM online_travel_agency WHERE region = 'Caribbean' GROUP BY ota_name HAVING SUM(bookings) > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "List the unique court locations where legal aid was provided in Alberta and Manitoba in the last 5 years.", "schema": "CREATE TABLE legal_aid_alberta (court_location VARCHAR(50), date DATE); INSERT INTO legal_aid_alberta VALUES ('Edmonton', '2022-02-01'), ('Calgary', '2021-06-15'), ('Red Deer', '2020-09-03'); CREATE TABLE legal_aid_manitoba (court_location VARCHAR(50), date DATE); INSERT INTO legal_aid_manitoba VALUES ('Winnipeg', '2022-03-10'), ('Brandon', '2021-12-20'), ('Thompson', '2020-07-25');", "sql": "SELECT DISTINCT court_location FROM legal_aid_alberta WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR) UNION ALL SELECT DISTINCT court_location FROM legal_aid_manitoba WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 5 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 227, "num_statements": 1} {"question": "What is the difference in average customer spending between customers in Mexico and Argentina?", "schema": "CREATE TABLE CustomerSpendingMX (CustomerID INT, Country TEXT, AvgSpending DECIMAL(5,2)); INSERT INTO CustomerSpendingMX (CustomerID, Country, AvgSpending) VALUES (1, 'Mexico', 120.50), (2, 'Mexico', 110.50), (3, 'Mexico', 130.50), (4, 'Mexico', 90.50); CREATE TABLE CustomerSpendingAR (CustomerID INT, Country TEXT, AvgSpending DECIMAL(5,2)); INSERT INTO CustomerSpendingAR (CustomerID, Country, AvgSpending) VALUES (1, 'Argentina', 105.00), (2, 'Argentina', 115.00), (3, 'Argentina', 125.00), (4, 'Argentina', 135.00);", "sql": "SELECT AVG(CSMX.AvgSpending) - AVG(CSA.AvgSpending) FROM CustomerSpendingMX CSMX, CustomerSpendingAR CSA WHERE CSMX.Country = 'Mexico' AND CSA.Country = 'Argentina';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 165, "num_statements": 1} {"question": "Display the number of cruelty-free and non-cruelty-free skincare products.", "schema": "CREATE TABLE Products (id INT, name VARCHAR(50), type VARCHAR(20), cruelty_free BOOLEAN); INSERT INTO Products (id, name, type, cruelty_free) VALUES (1, 'Cleanser', 'Skincare', true), (2, 'Toner', 'Skincare', true), (3, 'Moisturizer', 'Skincare', false);", "sql": "SELECT CASE WHEN cruelty_free = true THEN 'Cruelty-free' ELSE 'Non-cruelty-free' END as product_type, COUNT(*) as count FROM Products WHERE type = 'Skincare' GROUP BY product_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "What is the total revenue generated by virtual tours for each region?", "schema": "CREATE TABLE tour (id INT, name TEXT, region TEXT, price INT); INSERT INTO tour (id, name, region, price) VALUES (1, 'Virtual Acropolis', 'Europe', 10); INSERT INTO tour (id, name, region, price) VALUES (2, 'Virtual Machu Picchu', 'South America', 15);", "sql": "SELECT region, SUM(price) as total_revenue FROM tour WHERE name LIKE '%virtual%' GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the maximum CO2 emission reduction achieved by green building projects in Toronto, Canada, since 2015?", "schema": "CREATE TABLE co2_reductions (id INT, city VARCHAR(20), country VARCHAR(20), year INT, co2_reduction FLOAT); INSERT INTO co2_reductions (id, city, country, year, co2_reduction) VALUES (1, 'Toronto', 'Canada', 2015, 1200), (2, 'Toronto', 'Canada', 2016, 1500), (3, 'Toronto', 'Canada', 2017, 1800), (4, 'Toronto', 'Canada', 2018, 2000);", "sql": "SELECT MAX(co2_reduction) FROM co2_reductions WHERE city = 'Toronto' AND country = 'Canada' AND year >= 2015;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which venue has a note of AM?", "schema": "CREATE TABLE table_name_32 (venue VARCHAR, notes VARCHAR)", "sql": "SELECT venue FROM table_name_32 WHERE notes = 'am';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is South Africa's to par?", "schema": "CREATE TABLE table_name_35 (to_par VARCHAR, country VARCHAR)", "sql": "SELECT to_par FROM table_name_35 WHERE country = 'south africa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many shared bicycles were available in Berlin on July 4, 2021?", "schema": "CREATE TABLE shared_bicycles( bicycle_id INT, availability_status VARCHAR(50), availability_date DATE, city VARCHAR(50));", "sql": "SELECT COUNT(*) FROM shared_bicycles WHERE availability_status = 'available' AND availability_date = '2021-07-04' AND city = 'Berlin';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which average's against score has 2 as a difference and a lost of 5?", "schema": "CREATE TABLE table_name_34 (against INTEGER, difference VARCHAR, lost VARCHAR)", "sql": "SELECT AVG(against) FROM table_name_34 WHERE difference = '2' AND lost = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What year did the Caphouse Colliery railway close?", "schema": "CREATE TABLE table_name_79 (closed VARCHAR, name VARCHAR)", "sql": "SELECT closed FROM table_name_79 WHERE name = 'caphouse colliery';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'collate.icu.utf8': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT * FROM collate_test3 WHERE b >= 'BBC';", "explanation": "Regression test for Collate.Icu.Utf8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM collate_test3 WHERE b >= 'BBC') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "What is the maximum number of likes for posts containing the hashtag \"#veganfood\" per month for the last two years?", "schema": "CREATE TABLE posts (id INT, user_id INT, content TEXT, likes INT, timestamp DATETIME); INSERT INTO posts (id, user_id, content, likes, timestamp) VALUES (1, 1, 'Vegan food recipe', 350, '2020-01-01 10:00:00'), (2, 2, 'Delicious vegan meal', 180, '2020-01-05 15:30:00');", "sql": "SELECT EXTRACT(MONTH FROM timestamp) AS month, MAX(likes) FROM posts JOIN hashtags ON posts.id = hashtags.post_id WHERE hashtag = '#veganfood' AND timestamp BETWEEN DATE_SUB(NOW(), INTERVAL 2 YEAR) AND NOW() GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 223, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is then engine when the notes state srt8?", "schema": "CREATE TABLE table_name_91 (engine VARCHAR, notes VARCHAR)", "sql": "SELECT engine FROM table_name_91 WHERE notes = 'srt8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total number of cases handled by attorneys in the \"public_defense\" department, partitioned by the attorney's gender?", "schema": "CREATE TABLE attorneys (attorney_id INT, name VARCHAR(50), department VARCHAR(50), gender VARCHAR(10)); INSERT INTO attorneys (attorney_id, name, department, gender) VALUES (1, 'John Doe', 'public_defense', 'male'); INSERT INTO attorneys (attorney_id, name, department, gender) VALUES (2, 'Jane Smith', 'public_defense', 'female'); CREATE TABLE cases (case_id INT, attorney_id INT); INSERT INTO cases (case_id, attorney_id) VALUES (1, 1); INSERT INTO cases (case_id, attorney_id) VALUES (2, 1); INSERT INTO cases (case_id, attorney_id) VALUES (3, 2);", "sql": "SELECT department, gender, COUNT(*) OVER (PARTITION BY department, gender) as total_cases FROM attorneys INNER JOIN cases ON attorneys.attorney_id = cases.attorney_id WHERE department = 'public_defense';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 203, "num_statements": 1} {"question": "What is the most common art medium for artists from Africa?", "schema": "CREATE TABLE artists (id INT, name TEXT, country TEXT, medium TEXT); INSERT INTO artists (id, name, country, medium) VALUES (1, 'John Doe', 'Nigeria', 'Sculpture'), (2, 'Jane Smith', 'Kenya', 'Painting'), (3, 'Mohamed Ahmed', 'Egypt', 'Sculpture'), (4, 'Aisha Mohamed', 'Senegal', 'Painting'), (5, 'Pedro Gonzales', 'South Africa', 'Drawing');", "sql": "SELECT country, medium, COUNT(*) AS frequency FROM artists WHERE country LIKE '%Africa%' GROUP BY country, medium ORDER BY frequency DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'groupingsets' (example 179).", "schema": null, "sql": "create table gs_hash_1 as\nselect g100, g10, sum(g::numeric), count(*), max(g::text)\nfrom gs_data_1 group by cube (g1000, g100,g10);", "explanation": "DDL from PostgreSQL core regression test for Groupingsets.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 131, "num_statements": 1} {"question": "What is the total number of episodes and average runtime for TV shows in the reality genre?", "schema": "CREATE TABLE tv_shows_data (id INT, title VARCHAR(255), genre VARCHAR(255), episodes INT, runtime INT); INSERT INTO tv_shows_data (id, title, genre, episodes, runtime) VALUES (1, 'Show1', 'Reality', 15, 30), (2, 'Show2', 'Reality', 20, 45), (3, 'Show3', 'Documentary', 10, 60), (4, 'Show4', 'Reality', 12, 60), (5, 'Show5', 'Drama', 20, 60);", "sql": "SELECT genre, AVG(runtime) AS avg_runtime, SUM(episodes) AS total_episodes FROM tv_shows_data WHERE genre = 'Reality' GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "What is the total cost of ingredients for the 'Veggie Delight' sandwich?", "schema": "CREATE TABLE Ingredients (id INT, item_name VARCHAR(50), cost DECIMAL(5,2)); INSERT INTO Ingredients VALUES (1, 'Lettuce', 0.25), (2, 'Tomato', 0.30), (3, 'Cucumber', 0.20), (4, 'Avocado', 0.50), (5, 'Bread', 0.75);", "sql": "SELECT SUM(cost) FROM Ingredients WHERE item_name = 'Veggie Delight';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many evidence-based policy making projects were completed in India since 2010?", "schema": "CREATE TABLE EvidenceBasedPolicy (id INT, project_name VARCHAR(50), country VARCHAR(50), start_date DATE, end_date DATE);", "sql": "SELECT COUNT(*) FROM EvidenceBasedPolicy WHERE country = 'India' AND start_date <= '2010-01-01' AND end_date >= '2010-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: On what date was the away team Sheffield Wednesday?", "schema": "CREATE TABLE table_name_51 (date VARCHAR, away_team VARCHAR)", "sql": "SELECT date FROM table_name_51 WHERE away_team = 'sheffield wednesday';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the stage for 08:46", "schema": "CREATE TABLE table_name_85 (stage VARCHAR, time__eest_ VARCHAR)", "sql": "SELECT stage FROM table_name_85 WHERE time__eest_ = '08:46';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the record for score of l 93–104 (ot)", "schema": "CREATE TABLE table_17288825_8 (record VARCHAR, score VARCHAR)", "sql": "SELECT record FROM table_17288825_8 WHERE score = 'L 93–104 (OT)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 359).", "schema": null, "sql": "SELECT sum(reads) AS io_sum_local_before_reads\n FROM pg_stat_io WHERE context = 'normal' AND object = 'temp relation' \\gset\n-- Read in evicted buffers, generating reads.\nSELECT COUNT(*) FROM test_io_local;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(reads) AS io_sum_local_before_reads\n FROM pg_stat_io WHERE context = 'normal' AND object = 'temp relation' \\gset\n-- Read in evicted buffers, generating reads.\nSELECT COUNT(*) FROM test_io_local) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 206, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the sumof points for year less than 1994 and chassis of lola lc89b", "schema": "CREATE TABLE table_name_47 (points INTEGER, year VARCHAR, chassis VARCHAR)", "sql": "SELECT SUM(points) FROM table_name_47 WHERE year < 1994 AND chassis = 'lola lc89b';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "How many users registered per month in the 'RPG' genre for the last year?", "schema": "CREATE TABLE Registrations (RegistrationID INT, UserID INT, RegistrationDate DATETIME, Game VARCHAR(50)); INSERT INTO Registrations (RegistrationID, UserID, RegistrationDate, Game) VALUES (1, 1, '2021-12-10', 'RPG'), (2, 2, '2022-01-05', 'RPG'), (3, 3, '2022-02-25', 'FPS');", "sql": "SELECT MONTH(RegistrationDate), YEAR(RegistrationDate), COUNT(*) as UsersRegistered FROM Registrations WHERE Game = 'RPG' AND RegistrationDate >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY YEAR(RegistrationDate), MONTH(RegistrationDate);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "What is the average water usage in Mumbai and Delhi?", "schema": "CREATE TABLE WaterUsage (Location VARCHAR(100), Usage FLOAT, Date DATE); INSERT INTO WaterUsage (Location, Usage, Date) VALUES ('Mumbai', 150, '2022-01-01'), ('Delhi', 200, '2022-01-01');", "sql": "SELECT Location, AVG(Usage) FROM WaterUsage GROUP BY Location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the result on October 20, 2002?", "schema": "CREATE TABLE table_name_87 (result VARCHAR, date VARCHAR)", "sql": "SELECT result FROM table_name_87 WHERE date = 'october 20, 2002';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which ship was launched from Devonport, Devon?", "schema": "CREATE TABLE table_name_30 (ship VARCHAR, location VARCHAR)", "sql": "SELECT ship FROM table_name_30 WHERE location = 'devonport, devon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the muzzle device with a 1:7 barrel twist and a stock 4th generation?", "schema": "CREATE TABLE table_name_47 (muzzle_device VARCHAR, barrel_twist VARCHAR, stock VARCHAR)", "sql": "SELECT muzzle_device FROM table_name_47 WHERE barrel_twist = '1:7' AND stock = '4th generation';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 15).", "schema": null, "sql": "SELECT U&'wrong: \\db99\\0061';", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT U&'wrong: \\db99\\0061') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Silver has a Golf of Li AO?", "schema": "CREATE TABLE table_name_20 (silver VARCHAR, gold VARCHAR)", "sql": "SELECT silver FROM table_name_20 WHERE gold = 'li ao';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "How many graduate students in the Engineering department come from each country?", "schema": "CREATE TABLE student_demographics (id INT, student_id INT, country VARCHAR(50), department VARCHAR(50)); INSERT INTO student_demographics (id, student_id, country, department) VALUES (1, 1, 'USA', 'Engineering'), (2, 2, 'Canada', 'Engineering'), (3, 3, 'Mexico', 'Engineering');", "sql": "SELECT country, COUNT(DISTINCT student_id) FROM student_demographics WHERE department = 'Engineering' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When cardboard boxes is segment c what is segment a?", "schema": "CREATE TABLE table_15187735_5 (segment_a VARCHAR, segment_c VARCHAR)", "sql": "SELECT segment_a FROM table_15187735_5 WHERE segment_c = 'Cardboard Boxes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the smallest Wins with a Position of 20th, and Poles smaller than 0?", "schema": "CREATE TABLE table_name_19 (wins INTEGER, position VARCHAR, poles VARCHAR)", "sql": "SELECT MIN(wins) FROM table_name_19 WHERE position = '20th' AND poles < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was the game with the loss of Moore (0-1)?", "schema": "CREATE TABLE table_name_82 (date VARCHAR, loss VARCHAR)", "sql": "SELECT date FROM table_name_82 WHERE loss = 'moore (0-1)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes were seen by 0.67 million US viewers on their original air dates?", "schema": "CREATE TABLE table_13336122_7 (original_air_date VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT COUNT(original_air_date) FROM table_13336122_7 WHERE us_viewers__million_ = '0.67';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which fault log included the most number of faulty parts? List the fault log id, description and record time.", "schema": "CREATE TABLE Fault_Log (fault_log_entry_id VARCHAR, fault_description VARCHAR, fault_log_entry_datetime VARCHAR); CREATE TABLE Fault_Log_Parts (fault_log_entry_id VARCHAR)", "sql": "SELECT T1.fault_log_entry_id, T1.fault_description, T1.fault_log_entry_datetime FROM Fault_Log AS T1 JOIN Fault_Log_Parts AS T2 ON T1.fault_log_entry_id = T2.fault_log_entry_id GROUP BY T1.fault_log_entry_id ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 239, "num_statements": 1} {"question": "PostgreSQL Indices: show example 12.", "schema": null, "sql": "CREATE INDEX people_names ON people ((first_name || ' ' || last_name));", "explanation": "Example from PostgreSQL documentation on Indices.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Show permit data for projects with permit numbers greater than 500?", "schema": "CREATE TABLE permit_data (id INT, project VARCHAR(50), permit_number INT, start_date DATE); INSERT INTO permit_data (id, project, permit_number, start_date) VALUES (1, 'Office Building', 450, '2019-12-20'), (2, 'Residential Apartments', 551, '2021-03-01'), (3, 'School', 333, '2020-05-15');", "sql": "SELECT * FROM permit_data WHERE permit_number > 500;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What assists has the Team SMU and the total points of 85?", "schema": "CREATE TABLE table_name_14 (assists VARCHAR, team VARCHAR, total_points VARCHAR)", "sql": "SELECT assists FROM table_name_14 WHERE team = 'smu' AND total_points = '85';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of races when there was more than 1 pole, and the fastest number of laps was less than 4?", "schema": "CREATE TABLE table_name_8 (races INTEGER, poles VARCHAR, fastest_laps VARCHAR)", "sql": "SELECT SUM(races) FROM table_name_8 WHERE poles > 1 AND fastest_laps < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the rank on channel for pilot", "schema": "CREATE TABLE table_24222929_4 (rank_on_channel VARCHAR, title VARCHAR)", "sql": "SELECT rank_on_channel FROM table_24222929_4 WHERE title = 'Pilot';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many original titles did Marriage Italian-Style have?", "schema": "CREATE TABLE table_10321805_1 (original_title VARCHAR, film_title_used_in_nomination VARCHAR)", "sql": "SELECT COUNT(original_title) FROM table_10321805_1 WHERE film_title_used_in_nomination = 'Marriage Italian-Style';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the dadar xi b where pifa colaba is sporting options?", "schema": "CREATE TABLE table_28759261_5 (dadar_xi_‘b’ VARCHAR, pifa_colaba_fc_u_17 VARCHAR)", "sql": "SELECT dadar_xi_‘b’ FROM table_28759261_5 WHERE pifa_colaba_fc_u_17 = 'Sporting Options';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Compare sales quantities between 'EcoStitch' and 'FairFashion' across all materials.", "schema": "CREATE TABLE SupplierSales (SaleID INT, SupplierName TEXT, Material TEXT, Quantity INT); INSERT INTO SupplierSales (SaleID, SupplierName, Material, Quantity) VALUES (4, 'EcoStitch', 'Silk', 10), (5, 'EcoStitch', 'Cotton', 20), (6, 'FairFashion', 'Silk', 15), (7, 'FairFashion', 'Cotton', 25);", "sql": "SELECT s1.SupplierName, SUM(s1.Quantity) - SUM(s2.Quantity) FROM SupplierSales s1 INNER JOIN SupplierSales s2 ON s1.Material = s2.Material AND (s1.SupplierName = 'EcoStitch' AND s2.SupplierName = 'FairFashion') GROUP BY s1.SupplierName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 236, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'xml' (example 2).", "schema": null, "sql": "INSERT INTO xmltest VALUES (1, 'one');", "explanation": "DML from PostgreSQL core regression test for Xml.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total amount of climate finance provided to countries in the Asia-Pacific region for climate adaptation projects in 2020?", "schema": "CREATE TABLE climate_finance (year INT, region VARCHAR(50), funding_type VARCHAR(50), amount INT);INSERT INTO climate_finance (year, region, funding_type, amount) VALUES (2020, 'Asia-Pacific', 'climate adaptation', 15000000);", "sql": "SELECT SUM(amount) FROM climate_finance WHERE year = 2020 AND region = 'Asia-Pacific' AND funding_type = 'climate adaptation';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which song has Drunkard Groom listed as additional information?", "schema": "CREATE TABLE table_2528382_1 (song VARCHAR, additional_info VARCHAR)", "sql": "SELECT song FROM table_2528382_1 WHERE additional_info = 'Drunkard Groom';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the percent for in manitoba?", "schema": "CREATE TABLE table_120778_1 (percent_for VARCHAR, jurisdiction VARCHAR)", "sql": "SELECT percent_for FROM table_120778_1 WHERE jurisdiction = 'Manitoba';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the maximum revenue generated by virtual tours in Spain?", "schema": "CREATE TABLE virtual_tours_spain (tour_id INT, tour_name TEXT, country TEXT, revenue FLOAT); INSERT INTO virtual_tours_spain (tour_id, tour_name, country, revenue) VALUES (1, 'Sagrada Familia Virtual Tour', 'Spain', 25000), (2, 'Alhambra Virtual Tour', 'Spain', 30000);", "sql": "SELECT MAX(revenue) FROM virtual_tours_spain WHERE country = 'Spain';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "What is the average GPA of graduate students in the 'Physics' department?", "schema": "CREATE TABLE grad_students (id INT, name TEXT, department TEXT, gpa DECIMAL(3,2)); INSERT INTO grad_students (id, name, department, gpa) VALUES (1, 'John Doe', 'Physics', 3.8); INSERT INTO grad_students (id, name, department, gpa) VALUES (2, 'Jane Smith', 'Physics', 3.6);", "sql": "SELECT AVG(gpa) FROM grad_students WHERE department = 'Physics';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of properties in urban areas with inclusive housing policies and their average price?", "schema": "CREATE TABLE property (id INT, price INT, area VARCHAR(255), has_inclusive_policy BOOLEAN); INSERT INTO property (id, price, area, has_inclusive_policy) VALUES (1, 200000, 'urban', true), (2, 300000, 'rural', false);", "sql": "SELECT SUM(price), AVG(price) FROM property WHERE area = 'urban' AND has_inclusive_policy = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Who are the farmers that raised animals in 'region1' and what was the total quantity of animals?", "schema": "CREATE TABLE farmer (farmer_id INT, farmer_name TEXT, region TEXT); INSERT INTO farmer (farmer_id, farmer_name, region) VALUES (1, 'FarmerA', 'region1'), (2, 'FarmerB', 'region2'), (3, 'FarmerC', 'region2'); CREATE TABLE animal_rearing (rearing_id INT, farmer_id INT, animal_type TEXT, quantity INT); INSERT INTO animal_rearing (rearing_id, farmer_id, animal_type, quantity) VALUES (1, 1, 'Cattle', 10), (2, 1, 'Chickens', 50), (3, 2, 'Pigs', 20), (4, 3, 'Goats', 30);", "sql": "SELECT f.farmer_name, SUM(ar.quantity) as total_animals FROM farmer f INNER JOIN animal_rearing ar ON f.farmer_id = ar.farmer_id WHERE f.region = 'region1' GROUP BY f.farmer_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "What are the total expenses for restorative justice programs in the state of New York?", "schema": "CREATE TABLE restorative_justice_programs (program_id INT, program_name TEXT, state TEXT, expenses DECIMAL(10,2)); INSERT INTO restorative_justice_programs (program_id, program_name, state, expenses) VALUES (1, 'Victim-Offender Mediation', 'New York', 50000), (2, 'Restorative Circles', 'New York', 35000), (3, 'Peacemaking Circles', 'New York', 75000);", "sql": "SELECT SUM(expenses) FROM restorative_justice_programs WHERE state = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the average when the strike rate is 75.78?", "schema": "CREATE TABLE table_26041144_10 (average VARCHAR, strike_rate VARCHAR)", "sql": "SELECT average FROM table_26041144_10 WHERE strike_rate = '75.78';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Which AI safety measures have a higher than average transparency rating?", "schema": "CREATE TABLE SafetyMeasures (id INT, measure VARCHAR(255), transparency_rating DECIMAL(5,2)); INSERT INTO SafetyMeasures (id, measure, transparency_rating) VALUES (1, 'AI Risk Assessment', 8.00), (2, 'AI Incident Response', 9.00), (3, 'AI Monitoring', 8.50);", "sql": "SELECT measure FROM SafetyMeasures WHERE transparency_rating > (SELECT AVG(transparency_rating) FROM SafetyMeasures);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the episode with production code 176265 aired?", "schema": "CREATE TABLE table_14889988_1 (original_air_date VARCHAR, production_code VARCHAR)", "sql": "SELECT original_air_date FROM table_14889988_1 WHERE production_code = 176265;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What are the average home run distances for athletes in the MLB?", "schema": "CREATE TABLE mlb_home_runs (player_id INT, player_name VARCHAR(50), home_run_distance FLOAT); INSERT INTO mlb_home_runs (player_id, player_name, home_run_distance) VALUES (1, 'Aaron Judge', 449.6), (2, 'Giancarlo Stanton', 434.7), (3, 'Nelson Cruz', 423.5);", "sql": "SELECT AVG(home_run_distance) FROM mlb_home_runs;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every growth rate in 1991-2001 when sex ratio in 2001 is 937?", "schema": "CREATE TABLE table_19589113_5 (growth_rate_1991_01 VARCHAR, sex_ratio_‡_2001 VARCHAR)", "sql": "SELECT growth_rate_1991_01 FROM table_19589113_5 WHERE sex_ratio_‡_2001 = 937;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What was the average marine species count per year by region?", "schema": "CREATE TABLE marine_species_by_region (region VARCHAR(255), year INT, species_count INT); INSERT INTO marine_species_by_region (region, year, species_count) VALUES ('Atlantic', 2010, 1000), ('Atlantic', 2011, 1050), ('Atlantic', 2012, 1100), ('Pacific', 2010, 1500), ('Pacific', 2011, 1600), ('Pacific', 2012, 1700);", "sql": "SELECT region, AVG(species_count) FROM marine_species_by_region GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "How many tourists visited Asian countries from North America in 2019?", "schema": "CREATE TABLE tourists (id INT, continent VARCHAR(50), country VARCHAR(50), visitors INT, year INT); INSERT INTO tourists (id, continent, country, visitors, year) VALUES (1, 'Asia', 'Japan', 2500, 2019), (2, 'Asia', 'China', 3000, 2019), (3, 'North America', 'USA', 1500, 2019), (4, 'North America', 'Canada', 1000, 2019);", "sql": "SELECT SUM(visitors) FROM tourists WHERE continent = 'Asia' AND year = 2019 AND country IN (SELECT country FROM tourists WHERE continent = 'North America');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What is the percentage of unvaccinated children in rural areas?", "schema": "CREATE TABLE vaccinations (id INT, rural BOOLEAN, vaccinated BOOLEAN); INSERT INTO vaccinations (id, rural, vaccinated) VALUES (1, true, true), (2, false, false), (3, true, false);", "sql": "SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM vaccinations WHERE rural = true) FROM vaccinations WHERE rural = true AND vaccinated = false;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number of finishes associated with an elimination of Adriana?", "schema": "CREATE TABLE table_24122653_2 (finish VARCHAR, eliminated VARCHAR)", "sql": "SELECT COUNT(finish) FROM table_24122653_2 WHERE eliminated = 'Adriana';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "List the suppliers and their total sales, from the order_fact and sales_fact tables, ordered by total sales in descending order.", "schema": "CREATE TABLE sustainability_fact (sustainability_id INT, menu_item_id INT, sustainability_rating DECIMAL, sustainability_certified BOOLEAN);", "sql": "SELECT s.supplier_name, SUM(sf.sale_quantity * sf.sale_price) as total_sales FROM sales_fact sf JOIN order_fact o ON sf.sale_id = o.sale_id JOIN supplier_dim s ON o.supplier_id = s.supplier_id GROUP BY s.supplier_name ORDER BY total_sales DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 244, "num_statements": 1} {"question": "List all the unique countries that received military innovation support from the Army in 2017 and 2018.", "schema": "CREATE TABLE military_innovation (id INT, service VARCHAR(10), year INT, country VARCHAR(50)); INSERT INTO military_innovation (id, service, year, country) VALUES (1, 'Army', 2017, 'Canada'); INSERT INTO military_innovation (id, service, year, country) VALUES (2, 'Army', 2018, 'Mexico');", "sql": "SELECT DISTINCT country FROM military_innovation WHERE service = 'Army' AND year IN (2017, 2018);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the earliest year that had a start of Saint-Gaudens and a stage smaller than 15?", "schema": "CREATE TABLE table_name_56 (year INTEGER, start VARCHAR, stage VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_56 WHERE start = 'saint-gaudens' AND stage < 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Find the total production of Promethium in 2019 and 2020 from the Mineral_Production_6 table?", "schema": "CREATE TABLE Mineral_Production_6 (year INT, promethium_production FLOAT);", "sql": "SELECT SUM(promethium_production) FROM Mineral_Production_6 WHERE year IN (2019, 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "What was the total construction cost for infrastructure projects in New York from 2017 to 2019, broken down by project type?", "schema": "CREATE TABLE InfrastructureCostsNY (State TEXT, Year INTEGER, ProjectType TEXT, ConstructionCost REAL); INSERT INTO InfrastructureCostsNY (State, Year, ProjectType, ConstructionCost) VALUES ('New York', 2017, 'Bridge', 1750000.0), ('New York', 2017, 'Highway', 2350000.0), ('New York', 2017, 'Tunnel', 3250000.0), ('New York', 2018, 'Bridge', 1850000.0), ('New York', 2018, 'Highway', 2450000.0), ('New York', 2018, 'Tunnel', 3350000.0), ('New York', 2019, 'Bridge', 1700000.0), ('New York', 2019, 'Highway', 2300000.0), ('New York', 2019, 'Tunnel', 3200000.0);", "sql": "SELECT Year, ProjectType, SUM(ConstructionCost) as TotalCost FROM InfrastructureCostsNY WHERE State = 'New York' GROUP BY Year, ProjectType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Update the \"physician_practices\" table to reflect the correct \"total_doctors\" for the practice with id 2", "schema": "CREATE TABLE physician_practices (id INT PRIMARY KEY, name TEXT, state TEXT, total_doctors INT); INSERT INTO physician_practices (id, name, state, total_doctors) VALUES (1, 'Practice 1', 'TX', 10), (2, 'Practice 2', 'NY', 15), (3, 'Practice 3', 'FL', 20);", "sql": "UPDATE physician_practices SET total_doctors = 20 WHERE id = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Calculate the average Yttrium production per month in 2018.", "schema": "CREATE TABLE Yttrium_Production (Year INT, Month INT, Quantity INT); INSERT INTO Yttrium_Production (Year, Month, Quantity) VALUES (2017, 1, 125), (2017, 2, 140), (2017, 3, 155), (2017, 4, 170), (2018, 1, 185), (2018, 2, 210), (2018, 3, 235), (2018, 4, 260), (2018, 5, 285), (2018, 6, 310), (2018, 7, 335), (2018, 8, 360), (2018, 9, 385), (2018, 10, 410), (2018, 11, 435), (2018, 12, 460);", "sql": "SELECT AVG(Quantity) FROM Yttrium_Production WHERE Year = 2018;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What was the total budget allocated for the 'Healthcare' department in the year 2021?", "schema": "CREATE TABLE Budget(year INT, department VARCHAR(20), amount INT); INSERT INTO Budget VALUES (2021, 'Healthcare', 7000000), (2021, 'Education', 5000000), (2022, 'Healthcare', 7800000), (2022, 'Education', 5500000);", "sql": "SELECT SUM(amount) FROM Budget WHERE department = 'Healthcare' AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Score, when Attendance is Gund Arena 20,562, and when Date is January 27?", "schema": "CREATE TABLE table_name_33 (score VARCHAR, attendance VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_33 WHERE attendance = 'gund arena 20,562' AND date = 'january 27';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the average account balance for customers in the 'young adults' age group?", "schema": "CREATE TABLE customers (id INT, name VARCHAR(255), age INT, account_balance DECIMAL(10, 2)); INSERT INTO customers (id, name, age, account_balance) VALUES (1, 'John Doe', 23, 5000.00), (2, 'Jane Smith', 27, 7000.00);", "sql": "SELECT AVG(account_balance) FROM customers WHERE age BETWEEN 18 AND 35;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the average revenue per sustainable meal?", "schema": "CREATE TABLE MenuEngineering(meal_type VARCHAR(10), sustainable BOOLEAN, revenue INT); INSERT INTO MenuEngineering(meal_type, sustainable, revenue) VALUES ('Burger', FALSE, 1000), ('Salad', TRUE, 800);", "sql": "SELECT AVG(revenue) FROM MenuEngineering WHERE sustainable = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Update the production capacity of manufacturer 'Fashion Inc.' in the US to 5000.", "schema": "CREATE TABLE manufacturers (id INT, name VARCHAR(255), country VARCHAR(255), production_capacity INT);", "sql": "UPDATE manufacturers SET production_capacity = 5000 WHERE name = 'Fashion Inc.' AND country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the latest satellite launch year by SpaceX?", "schema": "CREATE TABLE space_company (name TEXT, satellites_deployed INTEGER); INSERT INTO space_company (name, satellites_deployed) VALUES ('SpaceX', 2000); CREATE TABLE spacex_satellites (id INTEGER, name TEXT, launch_year INTEGER); INSERT INTO spacex_satellites (id, name, launch_year) VALUES (1, 'Starlink 1', 2019), (2, 'Starlink 2', 2020), (3, 'Starlink 3', 2021);", "sql": "SELECT MAX(launch_year) FROM spacex_satellites;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "List all mobile subscribers in South America who have used more than 10 GB of data in the last week and have an active broadband subscription?", "schema": "CREATE TABLE mobile_subscribers (id INT, region VARCHAR(20), data_usage INT, usage_date DATE, broadband BOOLEAN);", "sql": "SELECT m.id, m.region, m.data_usage, m.usage_date FROM mobile_subscribers m INNER JOIN (SELECT subscriber_id FROM mobile_subscribers WHERE data_usage > 10000 AND usage_date > DATE_SUB(CURRENT_DATE, INTERVAL 1 WEEK)) d ON m.id = d.subscriber_id WHERE m.region = 'South America' AND m.broadband = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 300, "num_statements": 1} {"question": "What is the total distance traveled by autonomous vehicles in CityC?", "schema": "CREATE TABLE CityC_VehicleMovement (vehicle_id INT, vehicle_type VARCHAR(20), is_autonomous BOOLEAN, distance FLOAT); INSERT INTO CityC_VehicleMovement (vehicle_id, vehicle_type, is_autonomous, distance) VALUES (1, 'Car', true, 56.2), (2, 'Bike', false, 12.4), (3, 'Car', false, 34.6), (4, 'Bus', true, 78.9);", "sql": "SELECT SUM(distance) FROM CityC_VehicleMovement WHERE is_autonomous = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the number of universities that have over a 20000 enrollment size for each affiliation type.", "schema": "CREATE TABLE university (affiliation VARCHAR, enrollment INTEGER)", "sql": "SELECT COUNT(*), affiliation FROM university WHERE enrollment > 20000 GROUP BY affiliation;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What's the total number of military vehicles by type in the 'military_equipment' table?", "schema": "CREATE TABLE military_equipment (type VARCHAR(20), count INT); INSERT INTO military_equipment (type, count) VALUES ('armored_vehicles', 1500), ('artillery', 1200), ('aircraft', 1800), ('naval_vessels', 2000);", "sql": "SELECT type, SUM(count) FROM military_equipment GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Error (example 25).", "schema": null, "sql": "/* using plpy.warning should not produce a traceback\n*/\nCREATE FUNCTION nested_warning() RETURNS text\n\tAS\n'def fun1():\n\tplpy.warning(\"boom\")\n\ndef fun2():\n\tfun1()\n\ndef fun3():\n\tfun2()\n\nfun3()\nreturn \"you''ve been warned\"\n'\n\tLANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Error.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 243, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many locations have the Sun Life Stadium?", "schema": "CREATE TABLE table_15647838_3 (location VARCHAR, stadium VARCHAR)", "sql": "SELECT COUNT(location) FROM table_15647838_3 WHERE stadium = 'Sun Life stadium';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "What are the top three countries with the most reported cybersecurity incidents?", "schema": "CREATE TABLE cybersecurity_incidents (id INT, country VARCHAR(255), date DATE);", "sql": "SELECT country, COUNT(*) as incident_count FROM cybersecurity_incidents WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY country ORDER BY incident_count DESC LIMIT 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'plpgsql' (example 754).", "schema": null, "sql": "INSERT INTO alter_table_under_transition_tables\n VALUES (1, '1'), (2, '2'), (3, '3');", "explanation": "DML from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Find the number of Renewable Energy projects in each state in the USA", "schema": "CREATE TABLE renewable_projects (project_id INT, name VARCHAR(50), type VARCHAR(50), location VARCHAR(50)); INSERT INTO renewable_projects (project_id, name, type, location) VALUES (1, 'Wind Farm 1', 'Wind', 'Texas');", "sql": "SELECT location, COUNT(*) FROM renewable_projects WHERE location LIKE 'USA%' GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total volume of timber produced by each region in Asia?", "schema": "CREATE TABLE timber_production_asia (region VARCHAR(255), volume INT); INSERT INTO timber_production_asia (region, volume) VALUES ('East Asia', 2500), ('South Asia', 1800), ('Southeast Asia', 1200);", "sql": "SELECT region, SUM(volume) FROM timber_production_asia GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the total number of employees who have changed departments in the last year?", "schema": "CREATE TABLE Employees (Employee_ID INT, First_Name VARCHAR(20), Last_Name VARCHAR(20), Department VARCHAR(20), Country VARCHAR(20), Join_Date DATE, Previous_Department VARCHAR(20), Change_Date DATE);", "sql": "SELECT COUNT(*) FROM Employees WHERE Change_Date >= DATEADD(year, -1, GETDATE());", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Identify the total mass of space debris collected by NASA and JAXA.", "schema": "CREATE TABLE space_debris (id INT, name VARCHAR(255), collection_date DATE, collecting_agency VARCHAR(255), mass FLOAT); INSERT INTO space_debris (id, name, collection_date, collecting_agency, mass) VALUES (1, 'RemoveDEBRIS', '2018-04-16', 'NASA', 220.0); INSERT INTO space_debris (id, name, collection_date, collecting_agency, mass) VALUES (2, 'RAMA', '2024-09-27', 'JAXA', 550.5); CREATE VIEW space_debris_nasa AS SELECT * FROM space_debris WHERE collecting_agency = 'NASA'; CREATE VIEW space_debris_jaxa AS SELECT * FROM space_debris WHERE collecting_agency = 'JAXA';", "sql": "SELECT SUM(s.mass) as total_mass FROM space_debris s INNER JOIN space_debris_nasa n ON s.id = n.id INNER JOIN space_debris_jaxa j ON s.id = j.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest number of participants in 2013 when there were more than 5 participants in 2010, less than 4 participants in 2012 and country was germany?", "schema": "CREATE TABLE table_name_34 (country VARCHAR)", "sql": "SELECT MIN(2013) FROM table_name_34 WHERE 2010 > 5 AND country = 'germany' AND 2012 < 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Worldwide Gross of the Film with a Rank of 16?", "schema": "CREATE TABLE table_name_68 (worldwide_gross VARCHAR, rank VARCHAR)", "sql": "SELECT worldwide_gross FROM table_name_68 WHERE rank = 16;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When has a Record of 1–0?", "schema": "CREATE TABLE table_name_52 (date VARCHAR, record VARCHAR)", "sql": "SELECT date FROM table_name_52 WHERE record = '1–0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the minimum food safety score for restaurants in New York?", "schema": "CREATE TABLE food_safety_inspections(restaurant VARCHAR(255), score INT, city VARCHAR(255)); INSERT INTO food_safety_inspections(restaurant, score, city) VALUES ('Restaurant1', 95, 'New York'), ('Restaurant2', 85, 'Los Angeles'), ('Restaurant3', 90, 'New York'), ('Restaurant4', 92, 'San Francisco'), ('Restaurant5', 88, 'San Francisco');", "sql": "SELECT MIN(score) FROM food_safety_inspections WHERE city = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 193).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION col_isnt_fk ( NAME, NAME[], TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "How many IoT sensors recorded data in \"Field3\" on June 15, 2022?", "schema": "CREATE TABLE Field3_Temp (sensor_id INT, sensor_reading DATE); INSERT INTO Field3_Temp (sensor_id, sensor_reading) VALUES (1, '2022-06-15'), (2, '2022-06-15'), (3, '2022-06-15'), (4, '2022-06-14');", "sql": "SELECT COUNT(DISTINCT sensor_id) FROM Field3_Temp WHERE sensor_reading = '2022-06-15';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the number of donations made by each donor in 2017?", "schema": "CREATE TABLE Donors (DonorID INT, DonorName TEXT, DonationYear INT); INSERT INTO Donors (DonorID, DonorName, DonationYear) VALUES (1, 'Donor A', 2017), (2, 'Donor B', 2017), (3, 'Donor C', 2017), (1, 'Donor A', 2017), (2, 'Donor B', 2017);", "sql": "SELECT DonorName, COUNT(*) as NumberOfDonations FROM Donors WHERE DonationYear = 2017 GROUP BY DonorName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'cluster' (example 21).", "schema": null, "sql": "INSERT INTO clstr_tst (b, c) VALUES (21, 'veintiuno');", "explanation": "DML from PostgreSQL core regression test for Cluster.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Which art movement has the highest average visitor count?", "schema": "CREATE TABLE Exhibitions (exhibition_name VARCHAR(255), theme VARCHAR(255), visitor_count INT); INSERT INTO Exhibitions (exhibition_name, theme, visitor_count) VALUES ('Impressionist', 'Impressionism', 2000), ('Cubist', 'Cubism', 1500), ('Surrealist', 'Surrealism', 1800);", "sql": "SELECT theme, AVG(visitor_count) as avg_visitor_count FROM Exhibitions GROUP BY theme ORDER BY avg_visitor_count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "What is the average climate finance project funding per country in 2021?", "schema": "CREATE TABLE climate_finance_projects (year INT, country VARCHAR(255), funding FLOAT); INSERT INTO climate_finance_projects (year, country, funding) VALUES (2020, 'USA', 12000000), (2020, 'France', 18000000), (2020, 'Brazil', 15000000), (2021, 'India', 10000000), (2021, 'China', 17000000);", "sql": "SELECT country, AVG(funding) AS avg_funding FROM climate_finance_projects WHERE year = 2021 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which country has the most of TV Channels? List the country and number of TV Channels it has.", "schema": "CREATE TABLE TV_Channel (Country VARCHAR)", "sql": "SELECT Country, COUNT(*) FROM TV_Channel GROUP BY Country ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What candidates ran in the election that featured harry lane englebright?", "schema": "CREATE TABLE table_1342370_5 (candidates VARCHAR, incumbent VARCHAR)", "sql": "SELECT candidates FROM table_1342370_5 WHERE incumbent = 'Harry Lane Englebright';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Time of the Athlete with a Reaction time of 0.164?", "schema": "CREATE TABLE table_name_48 (time INTEGER, reaction_time VARCHAR)", "sql": "SELECT SUM(time) FROM table_name_48 WHERE reaction_time = 0.164;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total length of all railways in Canada and the United States, grouped by the type of rail (e.g., freight, passenger, or high-speed)?", "schema": "CREATE TABLE Railways (RailwayID INT, Name VARCHAR(255), Country VARCHAR(255), Type VARCHAR(255), Length FLOAT); INSERT INTO Railways VALUES (1, 'Railway A', 'Canada', 'Freight', 1200); INSERT INTO Railways VALUES (2, 'Railway B', 'United States', 'Passenger', 1500); INSERT INTO Railways VALUES (3, 'Railway C', 'Canada', 'High-Speed', 800);", "sql": "SELECT Country, Type, SUM(Length) as TotalLength FROM Railways GROUP BY Country, Type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many byes were then when there were less than 737 against?", "schema": "CREATE TABLE table_name_13 (byes INTEGER, against INTEGER)", "sql": "SELECT SUM(byes) FROM table_name_13 WHERE against < 737;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many rides started between 6-7 AM for each system in May 2022?", "schema": "CREATE TABLE trips (trip_id INT, trip_start_time DATETIME, trip_end_time DATETIME, system_name VARCHAR(20));", "sql": "SELECT system_name, COUNT(*) FROM trips WHERE trip_start_time BETWEEN '2022-05-01 06:00:00' AND '2022-05-31 07:00:00' GROUP BY system_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "What is the monthly sales trend of cosmetic products in India, and which product categories have the highest and lowest sales?", "schema": "CREATE TABLE sales (id INT, product_name VARCHAR(255), product_category VARCHAR(255), sale_date DATE, sales_amount DECIMAL(10, 2), country VARCHAR(255));", "sql": "SELECT DATE_TRUNC('month', sale_date) as month, product_category, AVG(sales_amount) as avg_sales FROM sales WHERE country = 'India' GROUP BY month, product_category ORDER BY month, avg_sales DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Year(s) won has a Total larger than 295?", "schema": "CREATE TABLE table_name_31 (year_s__won VARCHAR, total INTEGER)", "sql": "SELECT year_s__won FROM table_name_31 WHERE total > 295;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the total revenue generated by each art category?", "schema": "CREATE TABLE ArtSales (id INT, art_category VARCHAR(255), revenue DECIMAL(10,2)); INSERT INTO ArtSales (id, art_category, revenue) VALUES (1, 'Painting', 5000), (2, 'Sculpture', 7000), (3, 'Photography', 3000);", "sql": "SELECT art_category, SUM(revenue) FROM ArtSales GROUP BY art_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the maximum cultural heritage preservation score for any country in 2022?", "schema": "CREATE TABLE country_data (country VARCHAR(255), year INT, score INT); INSERT INTO country_data (country, year, score) VALUES ('Australia', 2022, 95), ('Brazil', 2022, 92), ('Russia', 2022, 97);", "sql": "SELECT MAX(score) FROM country_data WHERE year = 2022;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the smallest week that had a result of t 17–17?", "schema": "CREATE TABLE table_name_75 (week INTEGER, result VARCHAR)", "sql": "SELECT MIN(week) FROM table_name_75 WHERE result = 't 17–17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "List all songs released in the last 6 months that belong to the 'Indie' genre.", "schema": "CREATE TABLE Songs (SongId INT, SongName VARCHAR(255), Genre VARCHAR(255), ReleaseDate DATE); INSERT INTO Songs (SongId, SongName, Genre, ReleaseDate) VALUES (1, 'Bohemian Rhapsody', 'Rock', '1975-11-30'), (2, 'Stairway to Heaven', 'Rock', '1971-11-08'), (3, 'Hey Jude', 'Pop', '1968-08-26'), (4, 'Sultans of Swing', 'Jazz', '1978-04-21');", "sql": "SELECT SongName FROM Songs WHERE Genre = 'Indie' AND ReleaseDate >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 109).", "schema": null, "sql": "select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS \"true\";", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS \"true\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the daily revenue for 'Restaurant D'?", "schema": "CREATE TABLE sales (id INT, restaurant_id INT, sales DECIMAL(5,2)); INSERT INTO sales (id, restaurant_id, sales) VALUES (1, 1, 100.00), (2, 1, 200.00), (3, 2, 150.00), (4, 3, 50.00), (5, 4, 300.00);", "sql": "SELECT SUM(sales) FROM sales WHERE restaurant_id = 4 GROUP BY DATE(time);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which basketball players have the most total points scored in each game for the last 30 days?", "schema": "CREATE TABLE games (game_date DATE, team VARCHAR(50), player VARCHAR(50), points INT); INSERT INTO games (game_date, team, player, points) VALUES ('2022-01-01', 'Bulls', 'Johnson', 30), ('2022-01-01', 'Bulls', 'Brown', 20), ('2022-01-02', 'Bulls', 'Johnson', 35), ('2022-01-02', 'Bulls', 'Brown', 15);", "sql": "SELECT game_date, player, SUM(points) AS total_points FROM games WHERE game_date >= DATEADD(day, -30, GETDATE()) GROUP BY game_date, player;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the maximum price of nail polish products that are cruelty-free?", "schema": "CREATE TABLE products (product_id INT, product_name VARCHAR(100), price DECIMAL(5,2), is_cruelty_free BOOLEAN, category VARCHAR(50));", "sql": "SELECT MAX(price) FROM products WHERE category = 'Nail Polish' AND is_cruelty_free = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what's the trim with engine being 3.5l lz4 v6", "schema": "CREATE TABLE table_1373768_1 (trim VARCHAR, engine VARCHAR)", "sql": "SELECT trim FROM table_1373768_1 WHERE engine = '3.5L LZ4 V6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 110).", "schema": null, "sql": "CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'int2': Write the SELECT query (example 15).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('1 asdf', 'int2vector');", "explanation": "Regression test for Int2 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('1 asdf', 'int2vector')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the death and injury situations caused by the ship with tonnage 't'?", "schema": "CREATE TABLE ship (Id VARCHAR); CREATE TABLE death (killed VARCHAR, injured VARCHAR, caused_by_ship_id VARCHAR)", "sql": "SELECT T1.killed, T1.injured FROM death AS T1 JOIN ship AS t2 ON T1.caused_by_ship_id = T2.id WHERE T2.tonnage = 't';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the number for Chauchilla Cemetery Battle with less than 511 pieces?", "schema": "CREATE TABLE table_name_54 (number INTEGER, pieces VARCHAR, name VARCHAR)", "sql": "SELECT AVG(number) FROM table_name_54 WHERE pieces < 511 AND name = 'chauchilla cemetery battle';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Find the intersection of open data sets related to diversity in 'state', 'county', and 'city' schemas.", "schema": "CREATE SCHEMA state; CREATE SCHEMA county; CREATE SCHEMA city; CREATE TABLE state.diversity_data (id INT, name VARCHAR(255), is_open BOOLEAN); CREATE TABLE county.diversity_data (id INT, name VARCHAR(255), is_open BOOLEAN); CREATE TABLE city.diversity_data (id INT, name VARCHAR(255), is_open BOOLEAN); INSERT INTO state.diversity_data (id, name, is_open) VALUES (1, 'population', true), (2, 'workforce', true); INSERT INTO county.diversity_data (id, name, is_open) VALUES (1, 'population', true), (2, 'workforce', true); INSERT INTO city.diversity_data (id, name, is_open) VALUES (1, 'population', true), (2, 'workforce', true), (3, 'elected_officials', true);", "sql": "SELECT * FROM ( (SELECT * FROM state.diversity_data WHERE is_open = true) INTERSECT (SELECT * FROM county.diversity_data WHERE is_open = true) INTERSECT (SELECT * FROM city.diversity_data WHERE is_open = true) ) AS intersected_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What circuit was the British Grand Prix?", "schema": "CREATE TABLE table_name_92 (circuit VARCHAR, race VARCHAR)", "sql": "SELECT circuit FROM table_name_92 WHERE race = 'british grand prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number is Fall 06 from the state with 34 for Fall 09?", "schema": "CREATE TABLE table_name_97 (fall_06 INTEGER, fall_09 VARCHAR)", "sql": "SELECT AVG(fall_06) FROM table_name_97 WHERE fall_09 = 34;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the total carbon sequestration potential for each region in the 'region_sequestration' table?", "schema": "CREATE TABLE region_sequestration (id INT, region VARCHAR(255), total_sequestration FLOAT); INSERT INTO region_sequestration (id, region, total_sequestration) VALUES (1, 'North', 5000.0), (2, 'South', 6000.0), (3, 'East', 4500.0), (4, 'West', 7000.0);", "sql": "SELECT region, SUM(total_sequestration) FROM region_sequestration GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the main presenter of bulgaria?", "schema": "CREATE TABLE table_1053802_1 (main_presenters VARCHAR, region_country VARCHAR)", "sql": "SELECT main_presenters FROM table_1053802_1 WHERE region_country = 'Bulgaria';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the total number of streams and albums sold by artists who have won a Grammy award?", "schema": "CREATE TABLE Artists (ArtistID INT, ArtistName VARCHAR(100), GrammyWinner BOOLEAN); INSERT INTO Artists (ArtistID, ArtistName, GrammyWinner) VALUES (1, 'Taylor Swift', TRUE), (2, 'Green Day', FALSE); CREATE TABLE MusicStreams (StreamID INT, SongID INT, ArtistID INT); INSERT INTO MusicStreams (StreamID, SongID, ArtistID) VALUES (1, 1, 1), (2, 2, 2); CREATE TABLE Albums (AlbumID INT, AlbumName VARCHAR(100), ArtistID INT); INSERT INTO Albums (AlbumID, AlbumName, ArtistID) VALUES (1, 'Fearless', 1), (2, 'American Idiot', 2);", "sql": "SELECT COUNT(DISTINCT ms.StreamID) + COUNT(DISTINCT a.AlbumID) AS TotalReleases FROM Artists a JOIN MusicStreams ms ON a.ArtistID = ms.ArtistID JOIN Albums al ON a.ArtistID = al.ArtistID WHERE GrammyWinner = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 213, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 15).", "schema": null, "sql": "SELECT subarray('{1234234,-30,-30,234234,-30}',0,-1);", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the type for rank 2?", "schema": "CREATE TABLE table_name_26 (type VARCHAR, rank VARCHAR)", "sql": "SELECT type FROM table_name_26 WHERE rank = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "How many donors live in 'Brazil'?", "schema": "CREATE TABLE donors (donor_id INT, donor_name TEXT, donation_amount DECIMAL(10,2), country TEXT); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (1, 'John Doe', 500.00, 'United States'); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (2, 'Jane Smith', 300.00, 'Canada'); INSERT INTO donors (donor_id, donor_name, donation_amount, country) VALUES (3, 'Jose Garcia', 250.00, 'Brazil');", "sql": "SELECT COUNT(*) FROM donors WHERE country = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the total number of articles published in 'San Francisco Chronicle' and 'Denver Post' in 2020?", "schema": "CREATE TABLE SFC_Articles(id INT, title VARCHAR(50), publication DATE, category VARCHAR(20));CREATE TABLE DP_Articles(id INT, title VARCHAR(50), publication DATE, category VARCHAR(20));", "sql": "SELECT COUNT(*) FROM (SELECT * FROM SFC_Articles WHERE publication BETWEEN '2020-01-01' AND '2020-12-31' UNION ALL SELECT * FROM DP_Articles WHERE publication BETWEEN '2020-01-01' AND '2020-12-31') t;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "What is the average budget for humanitarian aid programs in the Middle East?", "schema": "CREATE TABLE programs (id INT, program_name VARCHAR(50), program_type VARCHAR(20), org_id INT, start_date DATE, end_date DATE, budget DECIMAL(10,2));", "sql": "SELECT AVG(budget) FROM programs WHERE program_type = 'Humanitarian Aid' AND country_code = 'ME';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Count the number of creative AI applications by type", "schema": "CREATE TABLE creative_ai (application_name TEXT, application_type TEXT); INSERT INTO creative_ai (application_name, application_type) VALUES ('App1', 'Image Generation'), ('App2', 'Text Generation'), ('App3', 'Music Generation');", "sql": "SELECT application_type, COUNT(*) FROM creative_ai GROUP BY application_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the circuit for hertz british grand prix", "schema": "CREATE TABLE table_27948565_1 (circuit VARCHAR, grand_prix VARCHAR)", "sql": "SELECT circuit FROM table_27948565_1 WHERE grand_prix = 'Hertz British grand_prix';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 19).", "schema": null, "sql": "SELECT * FROM test_name WHERE i>='abc'::text ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the minimum salary for data scientists in 'tech_salaries' table?", "schema": "CREATE TABLE tech_salaries (position VARCHAR(50), employee_name VARCHAR(50), salary INTEGER, company_location VARCHAR(50));", "sql": "SELECT MIN(salary) FROM tech_salaries WHERE position = 'Data Scientist';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the series standing after game 1?", "schema": "CREATE TABLE table_name_5 (series VARCHAR, game VARCHAR)", "sql": "SELECT series FROM table_name_5 WHERE game = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Update the artist name 'Eminem' to 'Marshall Mathers' in the MusicArtists table.", "schema": "CREATE TABLE MusicArtists (artist_id INT, artist_name VARCHAR(50), genre VARCHAR(20));", "sql": "UPDATE MusicArtists SET artist_name = 'Marshall Mathers' WHERE artist_name = 'Eminem';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Update the rating of 'Breaking Bad' to 4.9.", "schema": "CREATE TABLE tv_shows (show_id INT, title VARCHAR(100), release_year INT, rating FLOAT); INSERT INTO tv_shows (show_id, title, release_year, rating) VALUES (1, 'Game of Thrones', 2011, 4.1), (2, 'Stranger Things', 2016, 4.6), (3, 'Breaking Bad', 2008, 4.8);", "sql": "UPDATE tv_shows SET rating = 4.9 WHERE title = 'Breaking Bad';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "How many mental health parity violations were reported in California and Texas in 2020 and 2021?", "schema": "CREATE TABLE MentalHealthParityViolations (Id INT, State VARCHAR(2), Year INT, ViolationCount INT); INSERT INTO MentalHealthParityViolations (Id, State, Year, ViolationCount) VALUES (1, 'CA', 2020, 120), (2, 'TX', 2020, 150), (3, 'CA', 2021, 145), (4, 'TX', 2021, 175), (5, 'NY', 2020, 105), (6, 'FL', 2021, 130);", "sql": "SELECT State, SUM(ViolationCount) as TotalViolations FROM MentalHealthParityViolations WHERE State IN ('CA', 'TX') AND Year BETWEEN 2020 AND 2021 GROUP BY State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "Which country has the highest revenue in skincare products?", "schema": "CREATE TABLE CountrySales (country VARCHAR(255), category VARCHAR(255), revenue FLOAT);", "sql": "SELECT country, MAX(revenue) FROM CountrySales WHERE category = 'Skincare' GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the time for Moscow, Russia?", "schema": "CREATE TABLE table_name_62 (time VARCHAR, location VARCHAR)", "sql": "SELECT time FROM table_name_62 WHERE location = 'moscow, russia';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the total number of volunteers and total donation amount for each organization in a given year?", "schema": "CREATE TABLE organization (id INT, name VARCHAR(255)); CREATE TABLE volunteer (id INT, name VARCHAR(255), organization_id INT, volunteer_date DATE); CREATE TABLE donation (id INT, donor_id INT, organization_id INT, amount DECIMAL(10,2), donation_date DATE);", "sql": "SELECT o.name, COUNT(v.id) as total_volunteers, SUM(d.amount) as total_donations FROM volunteer v JOIN organization o ON v.organization_id = o.id JOIN donation d ON o.id = d.organization_id WHERE YEAR(v.volunteer_date) = 2022 AND YEAR(d.donation_date) = 2022 GROUP BY o.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 273, "num_statements": 1} {"question": "How many cybersecurity incidents were reported by the defense industry in 2020?", "schema": "CREATE TABLE Cybersecurity_Incidents (id INT, industry VARCHAR(50), year INT, reported_count INT);", "sql": "SELECT SUM(reported_count) FROM Cybersecurity_Incidents WHERE industry = 'defense' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Which social issues have the highest impact scores in projects located in Southeast Asia?", "schema": "CREATE TABLE social_issues (id INT PRIMARY KEY, name VARCHAR(255), impact_score INT);CREATE TABLE projects (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255), budget DECIMAL(10,2));CREATE TABLE project_issues (project_id INT, issue_id INT, PRIMARY KEY (project_id, issue_id));CREATE VIEW high_impact_issues AS SELECT * FROM social_issues WHERE impact_score >= 75;", "sql": "SELECT p.name, p.location, si.name as issue, si.impact_score FROM projects p JOIN project_issues pi ON p.id = pi.project_id JOIN social_issues si ON pi.issue_id = si.id WHERE p.location = 'Southeast Asia' AND si.impact_score IN (SELECT impact_score FROM high_impact_issues);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 274, "num_statements": 1} {"question": "Display the number of threat intelligence records and their source by month", "schema": "CREATE TABLE threat_monthly (id INT, record_date DATE, source VARCHAR(10)); INSERT INTO threat_monthly (id, record_date, source) VALUES (1, '2022-02-01', 'TI5'), (2, '2022-02-15', 'TI6'), (3, '2022-03-01', 'TI7'), (4, '2022-04-01', 'TI8'), (5, '2022-04-15', 'TI5'), (6, '2022-05-01', 'TI6');", "sql": "SELECT EXTRACT(MONTH FROM record_date) as month, source, COUNT(*) as records FROM threat_monthly GROUP BY month, source;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the quantity when the seats number was 16 and the class was kss 1913?", "schema": "CREATE TABLE table_name_67 (quantity VARCHAR, seats VARCHAR, class VARCHAR)", "sql": "SELECT quantity FROM table_name_67 WHERE seats = 16 AND class = 'kss 1913';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the percentage of social equity participation for each program in the last year, and the corresponding average participation percentage?", "schema": "CREATE TABLE dispensaries (id INT PRIMARY KEY, name VARCHAR(255), location VARCHAR(255)); CREATE TABLE social_equity_programs (id INT PRIMARY KEY, dispensary_id INT, program_name VARCHAR(255), start_date DATE); CREATE TABLE social_equity_participation (id INT PRIMARY KEY, dispensary_id INT, year INT, percentage FLOAT);", "sql": "SELECT program_name, AVG(percentage) as avg_percentage FROM social_equity_programs INNER JOIN social_equity_participation ON social_equity_programs.id = social_equity_participation.dispensary_id WHERE YEAR(social_equity_participation.year) = YEAR(CURRENT_DATE) - 1 GROUP BY program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 287, "num_statements": 1} {"question": "What is the average price of eco-friendly materials sourced from Latin America?", "schema": "CREATE TABLE eco_materials (id INT, material VARCHAR(50), source_country VARCHAR(50), price DECIMAL(10,2)); INSERT INTO eco_materials (id, material, source_country, price) VALUES (1, 'organic cotton', 'Peru', 2.50), (2, 'recycled polyester', 'Brazil', 3.20), (3, 'Tencel', 'Brazil', 4.50);", "sql": "SELECT AVG(price) FROM eco_materials WHERE source_country = 'Brazil' OR source_country = 'Peru';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "How many artifacts are made of gold in Peru?", "schema": "CREATE TABLE Site (SiteID INT PRIMARY KEY, SiteName VARCHAR(50), Country VARCHAR(50), City VARCHAR(50)); INSERT INTO Site (SiteID, SiteName, Country, City) VALUES (7, 'Machu Picchu', 'Peru', 'Cuzco'); CREATE TABLE Artifact (ArtifactID INT PRIMARY KEY, SiteID INT, ArtifactName VARCHAR(50), Material VARCHAR(50), Era VARCHAR(50)); INSERT INTO Artifact (ArtifactID, SiteID, ArtifactName, Material, Era) VALUES (6, 2, 'Golden Mask', 'Gold', 'Inca'), (7, 7, 'Golden Idol', 'Gold', 'Inca');", "sql": "SELECT COUNT(*) FROM Artifact WHERE Material = 'Gold' AND SiteID = (SELECT SiteID FROM Site WHERE SiteName = 'Machu Picchu');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the finish with 200 laps and a start of 3?", "schema": "CREATE TABLE table_name_91 (finish VARCHAR, laps VARCHAR, start VARCHAR)", "sql": "SELECT finish FROM table_name_91 WHERE laps = 200 AND start = '3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the profits elegance (2007) in which mayor is ma. Ester a. Hamor", "schema": "CREATE TABLE table_255812_1 (income_class__2007_ VARCHAR, mayor VARCHAR)", "sql": "SELECT income_class__2007_ FROM table_255812_1 WHERE mayor = 'Ma. Ester A. Hamor';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many years saw 3 hurricanes wherein the strongest storm was level three?", "schema": "CREATE TABLE table_2930244_2 (year VARCHAR, number_of_hurricanes VARCHAR, strongest_storm VARCHAR)", "sql": "SELECT COUNT(year) FROM table_2930244_2 WHERE number_of_hurricanes = 3 AND strongest_storm = 'Three';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many golds does the nation having a rank of 8, fewer than 5 bronzes and more than 1 silver have?", "schema": "CREATE TABLE table_name_62 (gold VARCHAR, rank VARCHAR, bronze VARCHAR, silver VARCHAR)", "sql": "SELECT COUNT(gold) FROM table_name_62 WHERE bronze < 5 AND silver > 1 AND rank > 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 117).", "schema": null, "sql": "SELECT sum2(q1,q2) FROM int8_tbl;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum2(q1,q2) FROM int8_tbl) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the minimum depth of any marine protected area in the Pacific region?", "schema": "CREATE TABLE marine_protected_areas (name VARCHAR(255), location VARCHAR(255), depth FLOAT); INSERT INTO marine_protected_areas (name, location, depth) VALUES ('MPA 1', 'Pacific', 120.5); INSERT INTO marine_protected_areas (name, location, depth) VALUES ('MPA 2', 'Atlantic', 200.3);", "sql": "SELECT MIN(depth) FROM marine_protected_areas WHERE location = 'Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the soap opera when the years are 2000–2004, 2005—?", "schema": "CREATE TABLE table_name_10 (soap_opera VARCHAR, years VARCHAR)", "sql": "SELECT soap_opera FROM table_name_10 WHERE years = '2000–2004, 2005—';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Class has 36 Points?", "schema": "CREATE TABLE table_name_68 (class VARCHAR, points VARCHAR)", "sql": "SELECT class FROM table_name_68 WHERE points = 36;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "How many safety inspections have been conducted on fishing vessels in the past year?", "schema": "CREATE TABLE vessel (id INT, type VARCHAR(50), name VARCHAR(50));CREATE TABLE safety_inspection (id INT, vessel_id INT, inspection_date DATE);", "sql": "SELECT COUNT(si.id) as total_inspections FROM vessel v INNER JOIN safety_inspection si ON v.id = si.vessel_id WHERE v.type = 'fishing' AND si.inspection_date >= DATE(NOW(), INTERVAL -1 YEAR);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "How many cities in the 'transportation' schema have more than 10000 electric vehicles?", "schema": "CREATE TABLE city_electric_vehicles (city_name VARCHAR(255), num_electric_vehicles INT); INSERT INTO city_electric_vehicles (city_name, num_electric_vehicles) VALUES ('San Francisco', 15000), ('Los Angeles', 20000), ('New York', 30000);", "sql": "SELECT COUNT(*) FROM (SELECT city_name FROM city_electric_vehicles WHERE num_electric_vehicles > 10000 GROUP BY city_name) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the 2011 when the 2007 is a, and the 2012 is sf?", "schema": "CREATE TABLE table_name_58 (Id VARCHAR)", "sql": "SELECT 2011 FROM table_name_58 WHERE 2007 = 'a' AND 2012 = 'sf';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the maximum response time for emergency calls and fire incidents in the last month in Miami, FL?", "schema": "CREATE TABLE emergency_calls (id INT, date DATE, response_time INT, city VARCHAR(20), state VARCHAR(20)); INSERT INTO emergency_calls (id, date, response_time, city, state) VALUES (1, '2022-01-01', 10, 'Miami', 'FL'), (2, '2022-01-02', 20, 'Miami', 'FL'); CREATE TABLE fire_incidents (id INT, date DATE, response_time INT, city VARCHAR(20), state VARCHAR(20)); INSERT INTO fire_incidents (id, date, response_time, city, state) VALUES (1, '2022-01-01', 15, 'Miami', 'FL'), (2, '2022-01-02', 25, 'Miami', 'FL');", "sql": "SELECT 'emergency calls' AS type, MAX(response_time) FROM emergency_calls WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND city = 'Miami' AND state = 'FL' UNION ALL SELECT 'fire incidents' AS type, MAX(response_time) FROM fire_incidents WHERE date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) AND city = 'Miami' AND state = 'FL';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 338, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest year of disaffiliation of a CHCH-TV station, licensed in Hamilton, Ontario?", "schema": "CREATE TABLE table_name_74 (year_of_disaffiliation INTEGER, city_of_license_market VARCHAR)", "sql": "SELECT MIN(year_of_disaffiliation) FROM table_name_74 WHERE city_of_license_market = 'hamilton, ontario';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Calculate the total expenditure on community development initiatives in Indonesia for 2021 and 2022.", "schema": "CREATE TABLE community_initiatives (id INT, country VARCHAR(50), initiative VARCHAR(50), year INT, expenditure DECIMAL(10,2)); INSERT INTO community_initiatives (id, country, initiative, year, expenditure) VALUES (1, 'Indonesia', 'Healthcare Center', 2021, 180000.00), (2, 'Indonesia', 'Education Building', 2022, 220000.00);", "sql": "SELECT SUM(expenditure) FROM community_initiatives WHERE country = 'Indonesia' AND year IN (2021, 2022);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the january when april is courtney rachel culkin", "schema": "CREATE TABLE table_name_53 (january VARCHAR, april VARCHAR)", "sql": "SELECT january FROM table_name_53 WHERE april = 'courtney rachel culkin';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Notes, when Silver is \"Jarkko Huovila\"?", "schema": "CREATE TABLE table_name_3 (notes VARCHAR, silver VARCHAR)", "sql": "SELECT notes FROM table_name_3 WHERE silver = 'jarkko huovila';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--4.5.0--4.5.1, item 4).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.check_epoch_type (p_type text) RETURNS boolean\n LANGUAGE plpgsql IMMUTABLE SECURITY DEFINER\n SET search_path TO pg_catalog, pg_temp\n AS $$\nDECLARE\nv_result boolean;\nBEGIN\n SELECT p_type IN ('none', 'seconds', 'milliseconds') INTO v_result;\n RETURN v_result;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 322, "num_statements": 4} {"question": "Calculate the total funding for startups founded in each year", "schema": "CREATE TABLE startups (id INT, name VARCHAR(255), year_founded INT, total_funding INT); INSERT INTO startups (id, name, year_founded, total_funding) VALUES (1, 'Acme Inc', 2010, 5000000), (2, 'Beta Corp', 2012, 8000000), (3, 'Gamma Inc', 2015, 3000000), (4, 'Delta Ltd', 2018, 7000000);", "sql": "SELECT year_founded AS founded_year, SUM(total_funding) as total_funding FROM startups GROUP BY year_founded;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home team's score for north melbourne?", "schema": "CREATE TABLE table_name_57 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_57 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Delete models from Italy with a safety score less than 0.75.", "schema": "CREATE TABLE models_italy (model_id INT, name VARCHAR(255), country VARCHAR(255), safety_score FLOAT); INSERT INTO models_italy (model_id, name, country, safety_score) VALUES (1, 'ModelA', 'Italy', 0.82), (2, 'ModelB', 'Italy', 0.68), (3, 'ModelC', 'Italy', 0.90);", "sql": "DELETE FROM models_italy WHERE safety_score < 0.75 AND country = 'Italy';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How much A-League has a Pre-Season larger than 0?", "schema": "CREATE TABLE table_name_85 (a_league VARCHAR, pre_season INTEGER)", "sql": "SELECT COUNT(a_league) FROM table_name_85 WHERE pre_season > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the airport with an ICAO of ULLI?", "schema": "CREATE TABLE table_name_40 (airport VARCHAR, icao VARCHAR)", "sql": "SELECT airport FROM table_name_40 WHERE icao = 'ulli';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Player is from the College of memphis?", "schema": "CREATE TABLE table_name_5 (player VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_name_5 WHERE college = 'memphis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Partner has a Tournament of catella swedish open?", "schema": "CREATE TABLE table_name_13 (partner VARCHAR, tournament VARCHAR)", "sql": "SELECT partner FROM table_name_13 WHERE tournament = 'catella swedish open';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the average quantity of 'Organic Silk' used by each brand?", "schema": "CREATE TABLE Brands (BrandID INT, BrandName VARCHAR(50), Material VARCHAR(50), Quantity INT);INSERT INTO Brands (BrandID, BrandName, Material, Quantity) VALUES (1, 'BrandA', 'Organic Cotton', 3000), (2, 'BrandB', 'Recycled Polyester', 2500), (1, 'BrandA', 'Organic Silk', 1000), (3, 'BrandC', 'Organic Cotton', 2000), (2, 'BrandB', 'Tencel', 1800);", "sql": "SELECT BrandName, AVG(Quantity) as AvgQuantity FROM Brands WHERE Material = 'Organic Silk' GROUP BY BrandName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which party had a Retired Democratic hold?", "schema": "CREATE TABLE table_name_97 (party VARCHAR, result VARCHAR)", "sql": "SELECT party FROM table_name_97 WHERE result = 'retired democratic hold';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Find the total number of animals in the 'endangered' status", "schema": "CREATE TABLE animals (id INT, name VARCHAR(50), status VARCHAR(20)); INSERT INTO animals (id, name, status) VALUES (1, 'Tiger', 'Endangered'); INSERT INTO animals (id, name, status) VALUES (2, 'Elephant', 'Vulnerable');", "sql": "SELECT COUNT(*) FROM animals WHERE status = 'Endangered';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_follower_dml, item 8).", "schema": null, "sql": "CREATE TABLE reference_table (a int, b int, z bigserial);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "What is the total number of tourists who visited Japan in 2020 from European countries?", "schema": "CREATE TABLE tourism_data (visitor_country VARCHAR(50), destination_country VARCHAR(50), visit_year INT); INSERT INTO tourism_data (visitor_country, destination_country, visit_year) VALUES ('France', 'Japan', 2020), ('Germany', 'Japan', 2020), ('Italy', 'Japan', 2020);", "sql": "SELECT SUM(*) FROM tourism_data WHERE visitor_country LIKE 'Europe%' AND visit_year = 2020 AND destination_country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years show Model of 2.0 tdi (cr) dpf?", "schema": "CREATE TABLE table_name_62 (years VARCHAR, model VARCHAR)", "sql": "SELECT years FROM table_name_62 WHERE model = '2.0 tdi (cr) dpf';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the maximum transaction value for the 'Gaming' industry sector in the 'DOGE' digital asset in Q2 2022?", "schema": "CREATE TABLE transaction_values (industry_sector VARCHAR(10), asset_name VARCHAR(10), quarter INT, max_transaction_value INT); INSERT INTO transaction_values (industry_sector, asset_name, quarter, max_transaction_value) VALUES ('Gaming', 'BTC', 1, 12000), ('Gaming', 'BTC', 2, 15000), ('Gaming', 'BTC', 3, 18000), ('Gaming', 'DOGE', 1, 20000), ('Gaming', 'DOGE', 2, 22000), ('Gaming', 'DOGE', 3, 25000);", "sql": "SELECT max_transaction_value FROM transaction_values WHERE industry_sector = 'Gaming' AND asset_name = 'DOGE' AND quarter = 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which venues closed in the 1990s?", "schema": "CREATE TABLE table_name_76 (venue VARCHAR, closed VARCHAR)", "sql": "SELECT venue FROM table_name_76 WHERE closed = '1990s';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What is the minimum calorie count for meals served at casual dining restaurants?", "schema": "CREATE TABLE meals (id INT, name TEXT, restaurant_type TEXT); INSERT INTO meals (id, name, restaurant_type) VALUES (1, 'Filet Mignon', 'fine dining'), (2, 'Chicken Caesar', 'casual dining'), (3, 'Tofu Stir Fry', 'fine dining'); CREATE TABLE nutrition (meal_id INT, calorie_count INT); INSERT INTO nutrition (meal_id, calorie_count) VALUES (1, 1200), (2, 800), (3, 900);", "sql": "SELECT MIN(nutrition.calorie_count) FROM nutrition JOIN meals ON nutrition.meal_id = meals.id WHERE meals.restaurant_type = 'casual dining';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 197).", "schema": null, "sql": "create table parted_2 partition of parted for values in (2);", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the maximum production capacity of any tank in the Eastern region?", "schema": "CREATE TABLE Production (tank VARCHAR(20), capacity INT, location VARCHAR(20)); INSERT INTO Production (tank, capacity, location) VALUES ('Tank3', 100000, 'Eastern'), ('Tank4', 150000, 'Western');", "sql": "SELECT MAX(capacity) FROM Production WHERE location = 'Eastern';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Away team score for Away team North Melbourne?", "schema": "CREATE TABLE table_name_13 (away_team VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_13 WHERE away_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who has less than 3 lanes for France?", "schema": "CREATE TABLE table_name_88 (name VARCHAR, nationality VARCHAR, lane VARCHAR)", "sql": "SELECT name FROM table_name_88 WHERE nationality = 'france' AND lane < 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which countries have mining companies with the highest labor productivity?", "schema": "CREATE TABLE Companies (CompanyID INT, CompanyName VARCHAR(50), Country VARCHAR(50), LaborProductivity DECIMAL(5,2)); INSERT INTO Companies (CompanyID, CompanyName, Country, LaborProductivity) VALUES (1, 'ACME Mining', 'Canada', 15.5), (2, 'BIG Excavations', 'South Africa', 12.3), (3, 'Giga Drilling', 'Australia', 18.7), (4, 'Mega Quarrying', 'Brazil', 10.1);", "sql": "SELECT Country FROM Companies WHERE LaborProductivity IN (SELECT MAX(LaborProductivity) FROM Companies);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the total number of concert tickets sold in each city for artists from the Pop genre?", "schema": "CREATE TABLE Concerts (id INT, city VARCHAR(255), tickets_sold INT); CREATE TABLE Artists (id INT, genre VARCHAR(255));", "sql": "SELECT city, SUM(tickets_sold) as total_tickets_sold FROM Concerts INNER JOIN Artists ON Concerts.id = Artists.id WHERE genre = 'Pop' GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "What is the total number of articles published in \"The Washington Post\" that mention \"climate change\" in the title or content?", "schema": "CREATE TABLE articles (id INT, title TEXT, content TEXT, publication_date DATE, newspaper TEXT);", "sql": "SELECT COUNT(*) FROM articles WHERE (title LIKE '%climate change%' OR content LIKE '%climate change%') AND newspaper = 'The Washington Post';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "What is the maximum production volume for zinc mines in South Africa?", "schema": "CREATE TABLE mines (id INT, name TEXT, location TEXT, production_volume INT); INSERT INTO mines (id, name, location, production_volume) VALUES (1, 'South African Zinc Mine 1', 'South Africa', 11000); INSERT INTO mines (id, name, location, production_volume) VALUES (2, 'South African Zinc Mine 2', 'South Africa', 9000);", "sql": "SELECT MAX(production_volume) FROM mines WHERE location = 'South Africa' AND mineral = 'zinc';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Calculate the average REE market trends for each year since 2017, excluding China.", "schema": "CREATE TABLE market_trends (year INT, market_trend VARCHAR(255), country VARCHAR(255)); INSERT INTO market_trends (year, market_trend, country) VALUES (2017, 'Increase', 'China'), (2017, 'Decrease', 'USA'), (2018, 'Stable', 'China'), (2018, 'Increase', 'USA'), (2019, 'Decrease', 'China'), (2019, 'Stable', 'USA');", "sql": "SELECT AVG(market_trend = 'Increase') AS avg_increase, AVG(market_trend = 'Decrease') AS avg_decrease, AVG(market_trend = 'Stable') AS avg_stable FROM market_trends WHERE year >= 2017 AND country != 'China';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Find mining operations that have higher than average emission factors", "schema": "CREATE TABLE operation_emission_factors (id INT, operation VARCHAR(255), emission_factor DECIMAL(10,2));", "sql": "SELECT operation FROM operation_emission_factors WHERE emission_factor > (SELECT AVG(emission_factor) FROM operation_emission_factors);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Count of active platforms in the South China Sea.", "schema": "CREATE TABLE platforms (id INT, location VARCHAR(50), status VARCHAR(50)); INSERT INTO platforms (id, location, status) VALUES (1, 'South China Sea', 'Active');", "sql": "SELECT COUNT(*) FROM platforms WHERE location = 'South China Sea' AND status = 'Active';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "What is the total number of union members in workplaces with more than 1000 total employees?", "schema": "CREATE TABLE workplaces (id INT, name TEXT, location TEXT, sector TEXT, total_employees INT, union_members INT, successful_cb BOOLEAN, cb_year INT); CREATE TABLE union_members (id INT, workplace_id INT, member_name TEXT, member_join_date DATE, member_status TEXT); INSERT INTO workplaces (id, name, location, sector, total_employees, union_members, successful_cb, cb_year) VALUES (1, 'School A', 'City X', 'education', 50, 30, true, 2020), (2, 'University B', 'City Y', 'education', 3000, 1500, true, 2019), (3, 'Factory C', 'City Z', 'manufacturing', 1200, 900, true, 2018); INSERT INTO union_members (id, workplace_id, member_name, member_join_date, member_status) VALUES (1, 1, 'John Doe', '2018-01-01', 'active'), (2, 1, 'Jane Smith', '2019-05-15', 'active'), (3, 2, 'Mike Johnson', '2020-03-01', 'active'), (4, 3, 'Sara Connor', '2017-09-01', 'active');", "sql": "SELECT SUM(um.member_status = 'active'::INTEGER) FROM union_members um JOIN workplaces w ON um.workplace_id = w.id WHERE w.total_employees > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_table' (example 261).", "schema": null, "sql": "create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype);", "explanation": "DDL from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "What is the average years of experience for teachers who teach Math?", "schema": "CREATE TABLE teachers (id INT, name VARCHAR(50), subject VARCHAR(50), years_experience INT); INSERT INTO teachers (id, name, subject, years_experience) VALUES (1, 'Alice Brown', 'Math', 10); INSERT INTO teachers (id, name, subject, years_experience) VALUES (2, 'Bob Johnson', 'Science', 15); INSERT INTO teachers (id, name, subject, years_experience) VALUES (3, 'Charlie Smith', 'Math', 12);", "sql": "SELECT AVG(years_experience) FROM teachers WHERE subject = 'Math';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Find the number of policies issued in 'Illinois' having a claim amount less than $500 and issued after '2020-01-01'.", "schema": "CREATE TABLE policyholders (id INT, name TEXT, state TEXT); CREATE TABLE policies (id INT, policyholder_id INT, issue_date DATE, claim_amount FLOAT); INSERT INTO policyholders (id, name, state) VALUES (1, 'Mike Brown', 'IL'); INSERT INTO policies (id, policyholder_id, issue_date, claim_amount) VALUES (1, 1, '2020-02-01', 400.00);", "sql": "SELECT COUNT(policies.id) FROM policies INNER JOIN policyholders ON policies.policyholder_id = policyholders.id WHERE policies.claim_amount < 500 AND policies.issue_date > '2020-01-01' AND policyholders.state = 'IL';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 216, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 381).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (8,9,'154.875085756903716715488911525453064308758123952566428258639786597308109810869086867746263482721081985848551254298524280231489145092826397833394044637104667137816928932471315095067524966582810436282901424423215992139000153713476369887383242289102867530775908269805285313842050961754114751975054515055089553180717444020378611767296609130477264722612784088270193199394531972594028420402254831778715196248487757266330454269044609134602570688339750190391651801546906342796660819535014295618246236706572780627362908121159003488810140236665846928586992082180006454824311789091323774002510945263351862712964422865623934112293184149374573706760114682326698881257123280119140924775171374360283137569618025005229268057970275164869735173660958715166148344076027212231446680947914004346760896298312286730627916684448923824769');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 854, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many points against have a lose of 13?", "schema": "CREATE TABLE table_17510803_2 (points_against VARCHAR, lost VARCHAR)", "sql": "SELECT points_against FROM table_17510803_2 WHERE lost = '13';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: I want the lowest points for tries of 39 and goals more than 0", "schema": "CREATE TABLE table_name_1 (points INTEGER, tries VARCHAR, goals VARCHAR)", "sql": "SELECT MIN(points) FROM table_name_1 WHERE tries = 39 AND goals > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the average funding for marine conservation per year?", "schema": "CREATE TABLE marine_conservation_funding (year INT, funding INT); INSERT INTO marine_conservation_funding (year, funding) VALUES (2020, 5000000), (2021, 5500000), (2022, 6000000);", "sql": "SELECT AVG(funding) FROM marine_conservation_funding;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Krypton has a Physical property of critical pressure (atm)?", "schema": "CREATE TABLE table_name_36 (krypton VARCHAR, physical_property VARCHAR)", "sql": "SELECT krypton FROM table_name_36 WHERE physical_property = 'critical pressure (atm)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "pgTAP test for Proctap (assertion 52).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_procedure( 'nonesuch', '{}'::name[], 'whatever' ),\n false,\n 'isnt_procedure(noproc, noargs, desc)',\n 'whatever',\n ' Function nonesuch() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Proctap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Update 'athlete_wellbeing' table to set 'wellbeing_score' to 70 where 'athlete_id' is 3", "schema": "CREATE TABLE athlete_wellbeing (athlete_id INT, wellbeing_score INT); INSERT INTO athlete_wellbeing (athlete_id, wellbeing_score) VALUES (1, 75), (2, 60), (3, 45), (4, 80);", "sql": "UPDATE athlete_wellbeing SET wellbeing_score = 70 WHERE athlete_id = 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "List all wastewater treatment plants in India with their treatment capacities for the year 2020.", "schema": "CREATE TABLE wastewater_plants (id INT, country VARCHAR(50), year INT, capacity FLOAT); INSERT INTO wastewater_plants (id, country, year, capacity) VALUES (1, 'India', 2020, 80.0), (2, 'India', 2019, 85.0), (3, 'India', 2018, 90.0), (4, 'United States', 2020, 95.0), (5, 'United States', 2019, 100.0), (6, 'United States', 2018, 105.0);", "sql": "SELECT capacity FROM wastewater_plants WHERE country = 'India' AND year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Identify the restaurants with the lowest food safety scores in each region.", "schema": "CREATE TABLE Restaurants (RestaurantID INT, RestaurantName VARCHAR(255), Region VARCHAR(255), FoodSafetyScore INT); INSERT INTO Restaurants VALUES (1,'Restaurant A','North',90),(2,'Restaurant B','North',85),(3,'Restaurant C','South',80),(4,'Restaurant D','South',82),(5,'Restaurant E','East',95),(6,'Restaurant F','East',93),(7,'Restaurant G','West',75),(8,'Restaurant H','West',88);", "sql": "SELECT Restaurants.RestaurantName, Restaurants.Region, Restaurants.FoodSafetyScore FROM Restaurants WHERE Restaurants.FoodSafetyScore = (SELECT MIN(FoodSafetyScore) FROM Restaurants AS T WHERE T.Region = Restaurants.Region) GROUP BY Restaurants.Region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Can you tell me the sum of FLap that has the Pole larger than 0, and the Podiums of 6?", "schema": "CREATE TABLE table_name_40 (flap INTEGER, pole VARCHAR, podiums VARCHAR)", "sql": "SELECT SUM(flap) FROM table_name_40 WHERE pole > 0 AND podiums = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Who is the top scorer for each team in a tournament?", "schema": "CREATE TABLE Player (PlayerID int, PlayerName varchar(50), TeamID int); CREATE TABLE Goal (GoalID int, PlayerID int, Goals int, MatchDate date, TournamentID int); INSERT INTO Player (PlayerID, PlayerName, TeamID) VALUES (1, 'James Rodriguez', 1), (2, 'Radamel Falcao', 1), (3, 'Thomas Muller', 2), (4, 'Miroslav Klose', 2); INSERT INTO Goal (GoalID, PlayerID, Goals, MatchDate, TournamentID) VALUES (1, 1, 2, '2022-06-01', 1), (2, 1, 3, '2022-06-05', 1), (3, 2, 1, '2022-06-01', 1), (4, 2, 2, '2022-06-05', 1), (5, 3, 4, '2022-06-01', 1), (6, 3, 5, '2022-06-05', 1), (7, 4, 2, '2022-06-01', 1), (8, 4, 3, '2022-06-05', 1);", "sql": "SELECT p.TeamID, p.PlayerName, SUM(g.Goals) AS TotalGoals, ROW_NUMBER() OVER (PARTITION BY p.TeamID ORDER BY SUM(g.Goals) DESC) AS Ranking FROM Player p JOIN Goal g ON p.PlayerID = g.PlayerID WHERE g.TournamentID = 1 GROUP BY p.TeamID, p.PlayerName HAVING Ranking <= 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 269, "num_statements": 1} {"question": "Who are the regulators for the 'Securities and Exchange Commission Act' and 'Digital Asset Business Act'?", "schema": "CREATE TABLE Regulatory_Frameworks (Framework_Name VARCHAR(100), Country VARCHAR(50), Regulatory_Body VARCHAR(100)); INSERT INTO Regulatory_Frameworks (Framework_Name, Country, Regulatory_Body) VALUES ('Digital Asset Business Act', 'Bermuda', 'Bermuda Monetary Authority'); INSERT INTO Regulatory_Frameworks (Framework_Name, Country, Regulatory_Body) VALUES ('Securities and Exchange Commission Act', 'United States', 'Securities and Exchange Commission');", "sql": "SELECT Regulatory_Body FROM Regulatory_Frameworks WHERE Framework_Name IN ('Securities and Exchange Commission Act', 'Digital Asset Business Act');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Borough for the Seward listing on 1977-11-23?", "schema": "CREATE TABLE table_name_54 (borough VARCHAR, location VARCHAR, listed VARCHAR)", "sql": "SELECT borough FROM table_name_54 WHERE location = 'seward' AND listed = '1977-11-23';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of streams for each artist, for artists who have performed at music festivals in the last year?", "schema": "CREATE TABLE artist_genre (artist_id INT, genre VARCHAR(255));", "sql": "SELECT a.artist_id, SUM(s.streams) as total_streams FROM artist_streams s JOIN festival_performances f ON s.artist_id = f.artist_id JOIN artist_genre g ON s.artist_id = g.artist_id WHERE f.performance_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY a.artist_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 270, "num_statements": 1} {"question": "What is the suicide rate in Japan?", "schema": "CREATE TABLE Suicide (Country TEXT, Suicides INT, Population INT); INSERT INTO Suicide (Country, Suicides, Population) VALUES ('Japan', 15000, 120000000), ('Japan', 16000, 120000000);", "sql": "SELECT (Suicides / Population) * 100000 FROM Suicide WHERE Country = 'Japan';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 669).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION types_are ( NAME[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 142).", "schema": null, "sql": "SELECT nummultirange(numrange(null,null)) @> numrange(2,null);", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange(numrange(null,null)) @> numrange(2,null)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many number of series have the production code of 1.11?", "schema": "CREATE TABLE table_2226817_2 (no_in_series INTEGER, production_code VARCHAR)", "sql": "SELECT MAX(no_in_series) FROM table_2226817_2 WHERE production_code = '1.11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What are the total production quantities of Gadolinium in 2019 and 2020 from the 'mining' and 'recycling' sources?", "schema": "CREATE TABLE mining (year INT, element VARCHAR(10), quantity INT); INSERT INTO mining VALUES (2019, 'Gadolinium', 1400), (2020, 'Gadolinium', 1600); CREATE TABLE recycling (year INT, element VARCHAR(10), quantity INT); INSERT INTO recycling VALUES (2019, 'Gadolinium', 1000), (2020, 'Gadolinium', 1200);", "sql": "SELECT year, SUM(quantity) FROM (SELECT year, quantity FROM mining WHERE element = 'Gadolinium' UNION ALL SELECT year, quantity FROM recycling WHERE element = 'Gadolinium') AS total_sources GROUP BY year HAVING year IN (2019, 2020);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 232, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Points against has a Lost of lost?", "schema": "CREATE TABLE table_name_97 (points_against VARCHAR)", "sql": "SELECT points_against FROM table_name_97 WHERE \"lost\" = 'lost';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's Trina Gulliver's high checkout?", "schema": "CREATE TABLE table_20351295_2 (high_checkout VARCHAR, player VARCHAR)", "sql": "SELECT high_checkout FROM table_20351295_2 WHERE player = 'Trina Gulliver';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 66).", "schema": null, "sql": "-- Try nonexistent user.\nSELECT * FROM check_test(\n language_privs_are( 'plpgsql', '__noone', '{USAGE}', 'whatever' ),\n false,\n 'language_privs_are(lang, non-role, privs, desc)',\n 'whatever',\n ' Role __noone does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 244, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 422).", "schema": null, "sql": "select 1 from\n text_tbl as tt1\n inner join text_tbl as tt2 on (tt1.f1 = 'foo')\n left join text_tbl as tt3 on (tt3.f1 = 'foo')\n left join text_tbl as tt4 on (tt3.f1 = tt4.f1),\n lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1\nwhere tt1.f1 = ss1.c0;", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 1 from\n text_tbl as tt1\n inner join text_tbl as tt2 on (tt1.f1 = 'foo')\n left join text_tbl as tt3 on (tt3.f1 = 'foo')\n left join text_tbl as tt4 on (tt3.f1 = tt4.f1),\n lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1\nwhere tt1.f1 = ss1.c0) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 269, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the player from the United States with a score of 71-72=143?", "schema": "CREATE TABLE table_name_38 (player VARCHAR, country VARCHAR, score VARCHAR)", "sql": "SELECT player FROM table_name_38 WHERE country = 'united states' AND score = 71 - 72 = 143;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Remove all clients with the last name 'Patel' from the 'clients' table", "schema": "CREATE TABLE clients (client_id INT PRIMARY KEY, client_first_name VARCHAR(50), client_last_name VARCHAR(50)); INSERT INTO clients (client_id, client_first_name, client_last_name) VALUES (1, 'Ravi', 'Patel'), (2, 'Kalpana', 'Patel'), (3, 'Raj', 'Singh'), (4, 'Simran', 'Kaur');", "sql": "DELETE FROM clients WHERE client_last_name = 'Patel';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "List all the menu items that have a 'Halal' certification", "schema": "CREATE TABLE menu_items (id INT, name VARCHAR(50), category VARCHAR(50), certification VARCHAR(50), price DECIMAL(5,2)); INSERT INTO menu_items (id, name, category, certification, price) VALUES (101, 'Beef Shawarma', 'Middle Eastern', 'Halal', 7.99), (102, 'Chicken Tikka Masala', 'Indian', 'Halal', 10.99), (103, 'Veggie Burger', 'American', NULL, 6.99);", "sql": "SELECT name FROM menu_items WHERE certification = 'Halal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the total of Rank for the County of Galway and has a Total that's larger than 13?", "schema": "CREATE TABLE table_name_60 (rank INTEGER, county VARCHAR, total VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_60 WHERE county = 'galway' AND total > 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of vaccinations administered to indigenous people in Canada?", "schema": "CREATE TABLE Vaccinations (VaccinationID INT, PatientID INT, Age INT, Ethnicity VARCHAR(30), VaccineType VARCHAR(20), Date DATE); INSERT INTO Vaccinations (VaccinationID, PatientID, Age, Ethnicity, VaccineType, Date) VALUES (1, 7, 45, 'Indigenous', 'Pfizer', '2021-01-10'); INSERT INTO Vaccinations (VaccinationID, PatientID, Age, Ethnicity, VaccineType, Date) VALUES (2, 8, 50, 'Non-Indigenous', 'Moderna', '2021-01-12');", "sql": "SELECT COUNT(*) FROM Vaccinations WHERE Ethnicity = 'Indigenous' AND Country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "How many restaurants in California offer vegan options on their menu?", "schema": "CREATE TABLE restaurants (id INT, name TEXT, location TEXT, type TEXT); INSERT INTO restaurants (id, name, location, type) VALUES (1, 'Restaurant A', 'California', 'Italian'), (2, 'Restaurant B', 'California', 'Vegan'), (3, 'Restaurant C', 'Texas', 'Mexican'), (4, 'Restaurant D', 'California', 'Chinese'); CREATE TABLE menu_items (id INT, name TEXT, is_vegan BOOLEAN, restaurant_id INT); INSERT INTO menu_items (id, name, is_vegan, restaurant_id) VALUES (1, 'Margherita Pizza', FALSE, 1), (2, 'Spaghetti Aglio e Olio', FALSE, 1), (3, 'Vegan Burger', TRUE, 2), (4, 'Garden Salad', TRUE, 2), (5, 'Tacos', FALSE, 3), (6, 'Vegan Sushi', TRUE, 4);", "sql": "SELECT COUNT(*) FROM restaurants INNER JOIN menu_items ON restaurants.id = menu_items.restaurant_id WHERE is_vegan = TRUE AND location = 'California';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: For week 15, what was the total number of attendance recorded?", "schema": "CREATE TABLE table_name_66 (attendance VARCHAR, week VARCHAR)", "sql": "SELECT COUNT(attendance) FROM table_name_66 WHERE week = 15;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Galician (reintegrationist) word of the Galician (Official) is bo día / bos días?", "schema": "CREATE TABLE table_26614365_5 (galician___reintegrationist__ VARCHAR, galician___official__ VARCHAR)", "sql": "SELECT galician___reintegrationist__ FROM table_26614365_5 WHERE galician___official__ = 'Bo día / Bos días';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "What is the maximum data usage in GB for postpaid mobile customers in each region?", "schema": "CREATE TABLE customers (id INT, type VARCHAR(10), region VARCHAR(10)); INSERT INTO customers (id, type, region) VALUES (1, 'postpaid', 'North'), (2, 'prepaid', 'North'), (3, 'postpaid', 'South'), (4, 'prepaid', 'South'); CREATE TABLE usage (customer_id INT, data_usage FLOAT); INSERT INTO usage (customer_id, data_usage) VALUES (1, 3.5), (2, 2.2), (3, 4.7), (4, 1.8);", "sql": "SELECT customers.region, MAX(usage.data_usage) FROM usage JOIN customers ON usage.customer_id = customers.id WHERE customers.type = 'postpaid' GROUP BY customers.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "PostgreSQL regression test 'timetz': Write the SELECT query (example 37).", "schema": null, "sql": "SELECT EXTRACT(EPOCH FROM TIME WITH TIME ZONE '2020-05-26 13:30:25.575401-04');", "explanation": "Regression test for Timetz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT EXTRACT(EPOCH FROM TIME WITH TIME ZONE '2020-05-26 13:30:25.575401-04')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the least place when the couple is Keiron & Brianne?", "schema": "CREATE TABLE table_26375386_28 (place INTEGER, couple VARCHAR)", "sql": "SELECT MIN(place) FROM table_26375386_28 WHERE couple = 'Keiron & Brianne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the match won by Willie Hunter at the Royal Liverpool Golf Club?", "schema": "CREATE TABLE table_name_21 (score VARCHAR, venue VARCHAR, champion VARCHAR)", "sql": "SELECT score FROM table_name_21 WHERE venue = 'royal liverpool golf club' AND champion = 'willie hunter';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Song has a Draw of less than 4 with a Percentage of 32.22%?", "schema": "CREATE TABLE table_name_72 (song VARCHAR, draw VARCHAR, percentage VARCHAR)", "sql": "SELECT song FROM table_name_72 WHERE draw < 4 AND percentage = '32.22%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the 2nd leg that had the Estudiantes La Plata home for the 2nd leg?", "schema": "CREATE TABLE table_name_41 (home__2nd_leg_ VARCHAR)", "sql": "SELECT 2 AS nd_leg FROM table_name_41 WHERE home__2nd_leg_ = 'estudiantes la plata';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Which circular supply chain initiatives have been implemented by suppliers in the USA?", "schema": "CREATE TABLE Suppliers (SupplierID int, Name varchar(50), Country varchar(50), CircularSupplyChainInitiatives bool); INSERT INTO Suppliers (SupplierID, Name, Country, CircularSupplyChainInitiatives) VALUES (1, 'Supplier A', 'USA', true), (2, 'Supplier B', 'Canada', false), (3, 'Supplier C', 'USA', true), (4, 'Supplier D', 'Mexico', false);", "sql": "SELECT Name FROM Suppliers WHERE Country = 'USA' AND CircularSupplyChainInitiatives = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert' (example 155).", "schema": null, "sql": "insert into list_parted (a) values ('aA');", "explanation": "DML from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What shows for 2002 when the 1991 is w?", "schema": "CREATE TABLE table_name_46 (Id VARCHAR)", "sql": "SELECT 2002 FROM table_name_46 WHERE 1991 = 'w';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the least number of penalties he got when his point total was over 1419 in more than 98 games?", "schema": "CREATE TABLE table_name_45 (penalties INTEGER, points_total VARCHAR, played VARCHAR)", "sql": "SELECT MIN(penalties) FROM table_name_45 WHERE points_total = 1419 AND played > 98;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Display the average voyage duration for each vessel type in the 'voyage_log' table", "schema": "CREATE TABLE voyage_log (id INT, vessel_type VARCHAR(50), voyage_duration INT);", "sql": "SELECT vessel_type, AVG(voyage_duration) FROM voyage_log GROUP BY vessel_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Overall is the highest one that has a Pick # smaller than 9, and a Name of mike pearson?", "schema": "CREATE TABLE table_name_3 (overall INTEGER, pick__number VARCHAR, name VARCHAR)", "sql": "SELECT MAX(overall) FROM table_name_3 WHERE pick__number < 9 AND name = 'mike pearson';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Country, when Player is \"Billy Maxwell\"?", "schema": "CREATE TABLE table_name_77 (country VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_77 WHERE player = 'billy maxwell';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average square footage of co-owned properties in Seattle?", "schema": "CREATE TABLE properties (id INT, city VARCHAR(20), square_footage INT, co_owned BOOLEAN); INSERT INTO properties (id, city, square_footage, co_owned) VALUES (1, 'Seattle', 1800, true); INSERT INTO properties (id, city, square_footage, co_owned) VALUES (2, 'Seattle', 1500, false);", "sql": "SELECT AVG(square_footage) FROM properties WHERE city = 'Seattle' AND co_owned = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the partner of the winner after 2012?", "schema": "CREATE TABLE table_name_12 (partner VARCHAR, year VARCHAR, outcome VARCHAR)", "sql": "SELECT partner FROM table_name_12 WHERE year > 2012 AND outcome = 'winner';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What championship after 1997 was the score 1–6, 4–6, 7–5, 5–7?", "schema": "CREATE TABLE table_name_3 (championship VARCHAR, year VARCHAR, score VARCHAR)", "sql": "SELECT championship FROM table_name_3 WHERE year > 1997 AND score = '1–6, 4–6, 7–5, 5–7';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the distribution of investments by sector for Red Fund?", "schema": "CREATE TABLE investments (id INT, fund_name VARCHAR(255), sector VARCHAR(255), investment_amount FLOAT);", "sql": "SELECT sector, COUNT(*) as num_investments, SUM(investment_amount) as total_invested FROM investments WHERE fund_name = 'Red Fund' GROUP BY sector;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 269).", "schema": null, "sql": "SELECT nummultirange() -|- 'empty'::numrange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange() -|- 'empty'::numrange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was South Melbourne's away team opponents?", "schema": "CREATE TABLE table_name_70 (away_team VARCHAR, home_team VARCHAR)", "sql": "SELECT away_team FROM table_name_70 WHERE home_team = 'south melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 588).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION runtests( );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the latest launch date for each country in the satellites table?", "schema": "CREATE TABLE satellites (id INT, name VARCHAR(50), launch_country VARCHAR(50), launch_date DATE); INSERT INTO satellites VALUES (1, 'Sputnik 1', 'Russia', '1957-10-04'); INSERT INTO satellites VALUES (2, 'Explorer 1', 'USA', '1958-01-31'); INSERT INTO satellites VALUES (3, 'Echo 1', 'USA', '1960-08-12'); INSERT INTO satellites VALUES (4, 'Cosmos 1', 'Russia', '1962-04-16');", "sql": "SELECT launch_country, MAX(launch_date) as latest_launch FROM satellites GROUP BY launch_country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When was successor David Atwood (R) seated?", "schema": "CREATE TABLE table_2417345_4 (date_successor_seated VARCHAR, successor VARCHAR)", "sql": "SELECT date_successor_seated FROM table_2417345_4 WHERE successor = 'David Atwood (R)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team had a race time of 2:53:57?", "schema": "CREATE TABLE table_2268216_1 (team VARCHAR, race_time VARCHAR)", "sql": "SELECT team FROM table_2268216_1 WHERE race_time = '2:53:57';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes have the Netflix episode number S08E04?", "schema": "CREATE TABLE table_15187735_15 (episode VARCHAR, netflix VARCHAR)", "sql": "SELECT COUNT(episode) FROM table_15187735_15 WHERE netflix = 'S08E04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "What is the quantity of item 'D01' in all warehouses?", "schema": "CREATE TABLE inventory (item_code varchar(5), warehouse_id varchar(5), quantity int); INSERT INTO inventory (item_code, warehouse_id, quantity) VALUES ('D01', 'LHR', 800), ('D01', 'MAD', 900);", "sql": "SELECT SUM(quantity) FROM inventory WHERE item_code = 'D01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total number of tickets sold for music concerts in Los Angeles by recurring customers?", "schema": "CREATE TABLE tickets (ticket_id INT, customer_id INT, event_type VARCHAR(50), price DECIMAL(10,2), city VARCHAR(50)); CREATE TABLE customers (customer_id INT, name VARCHAR(50), is_recurring BOOLEAN); INSERT INTO customers (customer_id, name, is_recurring) VALUES (1, 'Michael Johnson', TRUE), (2, 'Sarah Lee', FALSE); INSERT INTO tickets (ticket_id, customer_id, event_type, price, city) VALUES (1, 1, 'Music Concert', 50.00, 'Los Angeles'), (2, 2, 'Music Concert', 75.00, 'Los Angeles');", "sql": "SELECT SUM(t.price) AS total_sales FROM tickets t JOIN customers c ON t.customer_id = c.customer_id WHERE c.city = 'Los Angeles' AND c.is_recurring = TRUE AND t.event_type = 'Music Concert';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 1} {"question": "Who are the property owners in the 'prop_owners' table that have a 'property_id' matching with any record in the 'property2' table?", "schema": "CREATE TABLE prop_owners (id INT, owner VARCHAR(20), property_id INT); INSERT INTO prop_owners (id, owner, property_id) VALUES (1, 'Oliver', 201), (2, 'Sophia', 202), (3, 'Jacob', 203); CREATE TABLE property2 (id INT, city VARCHAR(20), price INT); INSERT INTO property2 (id, city, price) VALUES (201, 'Phoenix', 700000), (202, 'Denver', 500000);", "sql": "SELECT prop_owners.owner FROM prop_owners INNER JOIN property2 ON prop_owners.property_id = property2.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the term for hometown Ester Creek?", "schema": "CREATE TABLE table_name_55 (term VARCHAR, hometown VARCHAR)", "sql": "SELECT term FROM table_name_55 WHERE hometown = 'ester creek';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Which fields have had a decrease in soil moisture in the past week?", "schema": "CREATE TABLE field (id INT, name VARCHAR(20)); CREATE TABLE soil_moisture (id INT, field_id INT, value INT, timestamp TIMESTAMP);", "sql": "SELECT f.name FROM field f INNER JOIN soil_moisture sm1 ON f.id = sm1.field_id INNER JOIN soil_moisture sm2 ON f.id = sm2.field_id AND sm2.timestamp = sm1.timestamp - INTERVAL '1 day' WHERE sm1.value > sm2.value;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 212, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What label has the netherlands as the region, and magik muzik 802-1 as the catalog?", "schema": "CREATE TABLE table_name_83 (label VARCHAR, region VARCHAR, catalog VARCHAR)", "sql": "SELECT label FROM table_name_83 WHERE region = 'netherlands' AND catalog = 'magik muzik 802-1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the name and number of crimes for each police department in the state of Texas?", "schema": "CREATE TABLE states (id INT, name VARCHAR(255)); CREATE TABLE police_departments (id INT, state_id INT, name VARCHAR(255)); CREATE TABLE crimes (id INT, department_id INT, name VARCHAR(255), number INT);", "sql": "SELECT pd.name, c.name, c.number FROM police_departments pd JOIN crimes c ON pd.id = c.department_id WHERE pd.state_id = (SELECT id FROM states WHERE name = 'Texas');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: 4tab of commercial broadcaster Radio Tab broadcasts at what frequency?", "schema": "CREATE TABLE table_name_75 (frequency VARCHAR, callsign VARCHAR, purpose VARCHAR, on_air_id VARCHAR)", "sql": "SELECT frequency FROM table_name_75 WHERE purpose = 'commercial' AND on_air_id = 'radio tab' AND callsign = '4tab';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "PostgreSQL regression test 'int4': Write the SELECT query (example 56).", "schema": null, "sql": "SELECT (-2147483648)::int4 / (-1)::int4;", "explanation": "Regression test for Int4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (-2147483648)::int4 / (-1)::int4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which wicket had a 100 (45) Runs (balls) amount?", "schema": "CREATE TABLE table_name_43 (wicket VARCHAR, runs__balls_ VARCHAR)", "sql": "SELECT wicket FROM table_name_43 WHERE runs__balls_ = '100 (45)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the volume:issue of Sheryl Crow with 3 weeks on top?", "schema": "CREATE TABLE table_name_7 (volume VARCHAR, weeks_on_top VARCHAR, artist VARCHAR)", "sql": "SELECT volume AS :issue FROM table_name_7 WHERE weeks_on_top = '3' AND artist = 'sheryl crow';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "How many vessels transported liquefied natural gas in the Caribbean Sea in each month of 2021?", "schema": "CREATE TABLE VesselTypes (id INT, vessel_type VARCHAR(50), capacity INT); CREATE TABLE CargoTransports (id INT, vessel_id INT, transport_weight INT, transport_time TIMESTAMP);", "sql": "SELECT EXTRACT(MONTH FROM CT.transport_time) as month, COUNT(DISTINCT CT.vessel_id) as vessels_count FROM CargoTransports CT JOIN VesselTypes VT ON CT.vessel_id = VT.id WHERE VT.vessel_type = 'Liquefied Natural Gas Carrier' AND CT.transport_time > '2021-01-01' AND CT.transport_time < '2022-01-01' AND CT.latitude BETWEEN 10 AND 25 AND CT.longitude BETWEEN -90 AND -55 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 384, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which episode had viewership of 0.296 million?", "schema": "CREATE TABLE table_27987623_3 (episode VARCHAR, viewers__millions_ VARCHAR)", "sql": "SELECT episode FROM table_27987623_3 WHERE viewers__millions_ = '0.296';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What venue has an attendance of 30824 at Essendon in 1984?", "schema": "CREATE TABLE table_1139835_9 (venue VARCHAR, premier VARCHAR, attendance VARCHAR)", "sql": "SELECT venue FROM table_1139835_9 WHERE premier = 'Essendon' AND attendance = 30824;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the largest city where the population is 6620100?", "schema": "CREATE TABLE table_17416221_1 (largest_city VARCHAR, population__2013_ VARCHAR)", "sql": "SELECT largest_city FROM table_17416221_1 WHERE population__2013_ = 6620100;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "What is the maximum number of reviews for eco-friendly hotels in India?", "schema": "CREATE TABLE eco_hotels (hotel_id INT, hotel_name TEXT, reviews INT, country TEXT); INSERT INTO eco_hotels (hotel_id, hotel_name, reviews, country) VALUES (1, 'Eco Lodge Jaipur', 100, 'India'), (2, 'Green Hotel New Delhi', 150, 'India');", "sql": "SELECT MAX(reviews) FROM eco_hotels WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Insert a new record into the volunteers table with the following information: id = 5, name = 'Liam Brown', hours_served = 30.00.", "schema": "CREATE TABLE volunteers (id INT, name VARCHAR(50), hours_served FLOAT);", "sql": "INSERT INTO volunteers (id, name, hours_served) VALUES (5, 'Liam Brown', 30.00);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the HR Number when the LMS number is 14757 and has a built of 9/1915?", "schema": "CREATE TABLE table_name_75 (hr_no VARCHAR, built VARCHAR, lms_no VARCHAR)", "sql": "SELECT hr_no FROM table_name_75 WHERE built = '9/1915' AND lms_no = 14757;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the number of orders that were shipped using carbon-neutral methods in the ethical fashion marketplace?", "schema": "CREATE TABLE shipments (shipment_id INT, carbon_neutral BOOLEAN); INSERT INTO shipments (shipment_id, carbon_neutral) VALUES (1, true), (2, false), (3, true);", "sql": "SELECT COUNT(*) FROM shipments WHERE carbon_neutral = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many times is the country united states and the score 72-71-73-73=289?", "schema": "CREATE TABLE table_name_93 (to_par VARCHAR, country VARCHAR, score VARCHAR)", "sql": "SELECT COUNT(to_par) FROM table_name_93 WHERE country = 'united states' AND score = 72 - 71 - 73 - 73 = 289;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'tablefunc' (example 11).", "schema": null, "sql": "SELECT * FROM crosstab2('SELECT rowid, attribute, val FROM ct where rowclass = ''group2'' and (attribute = ''att1'' or attribute = ''att2'') ORDER BY 1,2;');", "explanation": "Example query from the 'tablefunc' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 2} {"question": "How many renewable energy projects have been implemented in the Asia Pacific region?", "schema": "CREATE TABLE renewable_energy (id INT, project_name VARCHAR(50), location VARCHAR(50)); INSERT INTO renewable_energy (id, project_name, location) VALUES (1, 'SolarFarm AP', 'Asia Pacific'), (2, 'WindFarm EU', 'Europe'), (3, 'HydroAP', 'Asia Pacific'), (4, 'GeoThermal NA', 'North America');", "sql": "SELECT COUNT(*) FROM renewable_energy WHERE location = 'Asia Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the region 1 date for series 4", "schema": "CREATE TABLE table_15823956_1 (region_1 VARCHAR, series VARCHAR)", "sql": "SELECT region_1 FROM table_15823956_1 WHERE series = '4';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Insert new student records for 'Oregon' and 'Washington' who have completed their mental health counseling", "schema": "CREATE TABLE NewStudents (StudentID INT, State VARCHAR(10), Counseling VARCHAR(10)); INSERT INTO NewStudents (StudentID, State, Counseling) VALUES (1, 'OR', 'Completed'), (2, 'WA', 'Completed');", "sql": "INSERT INTO NewStudents (StudentID, State, Counseling) VALUES (3, 'Oregon', 'Completed'), (4, 'Washington', 'Completed');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which states have renewable electricity equal to 9667 (gw×h)?", "schema": "CREATE TABLE table_25244412_1 (state VARCHAR, renewable_electricity__gw•h_ VARCHAR)", "sql": "SELECT state FROM table_25244412_1 WHERE renewable_electricity__gw•h_ = 9667;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Lost has a Position of 4, and a Drawn smaller than 3?", "schema": "CREATE TABLE table_name_4 (lost INTEGER, position VARCHAR, drawn VARCHAR)", "sql": "SELECT AVG(lost) FROM table_name_4 WHERE position = 4 AND drawn < 3;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 40).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (0,7,'-83028485');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "What are the three rural areas with the highest obesity rate in Canada?", "schema": "CREATE TABLE areas_canada (id INT, name VARCHAR(255), rural_designation VARCHAR(50)); INSERT INTO areas_canada (id, name, rural_designation) VALUES (1, 'Area X', 'Rural'); CREATE TABLE obesity_rates (id INT, area_id INT, obesity_rate FLOAT); INSERT INTO obesity_rates (id, area_id, obesity_rate) VALUES (1, 1, 30.5);", "sql": "SELECT a.name, ob.obesity_rate FROM areas_canada a JOIN obesity_rates ob ON a.id = ob.area_id WHERE a.rural_designation = 'Rural' ORDER BY ob.obesity_rate DESC LIMIT 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 168, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the time for the event in which Tony Mendoza was the opponent?", "schema": "CREATE TABLE table_name_22 (time VARCHAR, opponent VARCHAR)", "sql": "SELECT time FROM table_name_22 WHERE opponent = 'tony mendoza';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what game had a doubles round in 1979", "schema": "CREATE TABLE table_name_92 (mixed_doubles VARCHAR, year VARCHAR)", "sql": "SELECT mixed_doubles FROM table_name_92 WHERE year = 1979;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 589).", "schema": null, "sql": "INSERT INTO fract_only VALUES (9, 'NaN');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show a SQL definition from the pgaudit project (pgaudit, item 45).", "schema": null, "sql": "CREATE VIEW vw_test3 AS\nSELECT *\n FROM test3;", "explanation": "SQL definition from the open-source pgaudit PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the March 14 game?", "schema": "CREATE TABLE table_name_95 (score VARCHAR, date VARCHAR)", "sql": "SELECT score FROM table_name_95 WHERE date = 'march 14';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Show the name of the conductor that has conducted the most number of orchestras.", "schema": "CREATE TABLE orchestra (Conductor_ID VARCHAR); CREATE TABLE conductor (Name VARCHAR, Conductor_ID VARCHAR)", "sql": "SELECT T1.Name FROM conductor AS T1 JOIN orchestra AS T2 ON T1.Conductor_ID = T2.Conductor_ID GROUP BY T2.Conductor_ID ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the moving average of sales for the last 3 months?", "schema": "CREATE TABLE sales_data (sale_date DATE, units_sold INT); INSERT INTO sales_data (sale_date, units_sold) VALUES ('2022-01-01', 500), ('2022-01-02', 600), ('2022-01-03', 700), ('2022-02-01', 800), ('2022-02-02', 900), ('2022-02-03', 1000), ('2022-03-01', 1100), ('2022-03-02', 1200), ('2022-03-03', 1300);", "sql": "SELECT sale_date, AVG(units_sold) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as moving_average FROM sales_data;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "How many tickets were sold for the 'Homecoming' event in the 'tickets' table?", "schema": "CREATE TABLE tickets (ticket_id INT, event VARCHAR(50), price DECIMAL(5,2), quantity INT); INSERT INTO tickets (ticket_id, event, price, quantity) VALUES (1, 'Homecoming', 50.00, 1000); INSERT INTO tickets (ticket_id, event, price, quantity) VALUES (2, 'Season Finale', 75.00, 500);", "sql": "SELECT SUM(quantity) FROM tickets WHERE event = 'Homecoming';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What years did the Utah Jazz Player from Southern Methodist, Play?", "schema": "CREATE TABLE table_name_52 (years_for_jazz VARCHAR, school_club_team VARCHAR)", "sql": "SELECT years_for_jazz FROM table_name_52 WHERE school_club_team = 'southern methodist';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'matview' (example 91).", "schema": null, "sql": "CREATE MATERIALIZED VIEW mvtest_boxmv AS SELECT * FROM mvtest_boxes;", "explanation": "DDL from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_view", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "What is the maximum duration of space missions led by Canadian astronauts?", "schema": "CREATE TABLE SpaceMissionRecords (mission_name VARCHAR(30), astronaut_name VARCHAR(30), country VARCHAR(20), mission_duration INT); INSERT INTO SpaceMissionRecords (mission_name, astronaut_name, country, mission_duration) VALUES ('Moon Landing', 'Jacob Johnson', 'Canada', 120), ('Mars Exploration', 'Emily Brown', 'Canada', 180);", "sql": "SELECT MAX(mission_duration) FROM SpaceMissionRecords WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which premiere had languages in danish?", "schema": "CREATE TABLE table_11323532_2 (premiere VARCHAR, languages VARCHAR)", "sql": "SELECT premiere FROM table_11323532_2 WHERE languages = 'Danish';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "How many polar bears are there in Canada and Russia?", "schema": "CREATE TABLE polar_bear_population (country VARCHAR(255), year INT, population INT);", "sql": "SELECT SUM(population) FROM polar_bear_population WHERE country IN ('Canada', 'Russia');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 55).", "schema": null, "sql": "SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 132, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 88).", "schema": null, "sql": "SELECT * FROM check_test(\n has_type( 'sometype', 'mydesc' ),\n true,\n 'has_type(type, desc)',\n 'mydesc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 492).", "schema": null, "sql": "CREATE INDEX concur_reindex_part_index ON ONLY concur_reindex_part (c1);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "How many electric vehicles were sold in each city in 2020?", "schema": "CREATE TABLE cities (id INT, name VARCHAR(50), population INT); INSERT INTO cities (id, name, population) VALUES (1, 'San Francisco', 874000); INSERT INTO cities (id, name, population) VALUES (2, 'New York', 8601000); CREATE TABLE electric_vehicles (id INT, city_id INT, make VARCHAR(50), model VARCHAR(50), year INT, sales INT); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (1, 1, 'Tesla', 'Model S', 2020, 5000); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (2, 1, 'Tesla', 'Model 3', 2020, 8000); INSERT INTO electric_vehicles (id, city_id, make, model, year, sales) VALUES (3, 2, 'Chevrolet', 'Bolt', 2020, 3000);", "sql": "SELECT cities.name, electric_vehicles.year, SUM(electric_vehicles.sales) as total_sales FROM cities JOIN electric_vehicles ON cities.id = electric_vehicles.city_id GROUP BY cities.name, electric_vehicles.year HAVING electric_vehicles.year = 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 246, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What company is the kit manufacturer that Gareth Southgate belongs to?", "schema": "CREATE TABLE table_name_50 (kit_manufacturer VARCHAR, captain VARCHAR)", "sql": "SELECT kit_manufacturer FROM table_name_50 WHERE captain = 'gareth southgate';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "What is the maximum number of habitat preservation projects in South America, focusing on 'Rainforest Conservation'?", "schema": "CREATE TABLE HabitatProjects (ProjectID INT, Project VARCHAR(50), Maximum INT, Location VARCHAR(50)); INSERT INTO HabitatProjects (ProjectID, Project, Maximum, Location) VALUES (1, 'Rainforest Conservation', 100, 'South America'); INSERT INTO HabitatProjects (ProjectID, Project, Maximum, Location) VALUES (2, 'Ocean Preservation', 80, 'South America');", "sql": "SELECT MAX(Maximum) FROM HabitatProjects WHERE Project = 'Rainforest Conservation' AND Location = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "What is the average weight of chemical substances manufactured in the US that were produced in the last 5 years, grouped by their respective categories?", "schema": "CREATE TABLE chemicals (id INT, name VARCHAR(255), weight FLOAT, manufacturer_country VARCHAR(255), category VARCHAR(255), production_date DATE);", "sql": "SELECT category, AVG(weight) as avg_weight FROM chemicals WHERE manufacturer_country = 'USA' AND production_date > DATE_SUB(CURDATE(), INTERVAL 5 YEAR) GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "Insert a new record into the animals table with a species of 'Lion', population of 300, and status of 'Threatened'", "schema": "CREATE TABLE animals (species VARCHAR(50), population INT, status VARCHAR(20));", "sql": "INSERT INTO animals (species, population, status) VALUES ('Lion', 300, 'Threatened');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the total production of Terbium in Africa from 2017 to 2020?", "schema": "CREATE TABLE terbium_production (id INT, country TEXT, year INT, terbium_prod FLOAT); INSERT INTO terbium_production (id, country, year, terbium_prod) VALUES (1, 'South Africa', 2017, 120.0), (2, 'South Africa', 2018, 150.0), (3, 'South Africa', 2019, 180.0), (4, 'South Africa', 2020, 200.0), (5, 'Egypt', 2017, 50.0), (6, 'Egypt', 2018, 55.0), (7, 'Egypt', 2019, 60.0), (8, 'Egypt', 2020, 65.0);", "sql": "SELECT SUM(terbium_prod) as total_terbium_prod FROM terbium_production WHERE year BETWEEN 2017 AND 2020 AND country = 'South Africa' OR country = 'Egypt';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many episodes had 4.44m viewers?", "schema": "CREATE TABLE table_27039190_3 (episode VARCHAR, viewers VARCHAR)", "sql": "SELECT COUNT(episode) FROM table_27039190_3 WHERE viewers = '4.44m';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games were against Furman?", "schema": "CREATE TABLE table_20745444_1 (game INTEGER, opponent VARCHAR)", "sql": "SELECT MAX(game) FROM table_20745444_1 WHERE opponent = 'Furman';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Calculate the percentage of veterans employed in the defense industry", "schema": "CREATE TABLE veteran_employment (employee_id INT, industry VARCHAR(20));", "sql": "SELECT (COUNT(*) FILTER (WHERE industry = 'Defense')) * 100.0 / COUNT(*) as defense_industry_percentage FROM veteran_employment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 128, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_cat-api, item 10).", "schema": null, "sql": "CREATE OR REPLACE VIEW zdb.cat_nodeattrs AS\nSELECT\n url,\n (response->>'node') AS \"node\",\n (response->>'host') AS \"host\",\n (response->>'ip')::inet AS \"ip\",\n (response->>'attr') AS \"attr\",\n (response->>'value') AS \"value\"\nFROM zdb._all_indices_cat_request('nodeattrs');", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 285, "num_statements": 1} {"question": "Determine the dishes that have not been sold in the last 30 days", "schema": "CREATE TABLE sales_data (sale_id INT, dish_id INT, sale_date DATE); INSERT INTO sales_data (sale_id, dish_id, sale_date) VALUES (1, 1, '2022-01-01'), (2, 2, '2022-01-05'), (3, 1, '2022-01-10'); CREATE TABLE menu (dish_id INT, dish_name VARCHAR(255), dish_type VARCHAR(255)); INSERT INTO menu (dish_id, dish_name, dish_type) VALUES (1, 'Quinoa Salad', 'Vegetarian'), (2, 'Chicken Sandwich', 'Non-Vegetarian');", "sql": "SELECT m.dish_id, m.dish_name FROM menu m LEFT JOIN sales_data s ON m.dish_id = s.dish_id WHERE s.sale_date < DATE_SUB(CURDATE(), INTERVAL 30 DAY) IS NULL;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the losing team with a total of 24 when the winning team was Sydney Roosters?", "schema": "CREATE TABLE table_name_44 (losing_team VARCHAR, total VARCHAR, winning_team VARCHAR)", "sql": "SELECT losing_team FROM table_name_44 WHERE total = 24 AND winning_team = 'sydney roosters';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 423).", "schema": null, "sql": "select unnest(array(select '11 22 33'::oidvector from generate_series(1,5)));", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select unnest(array(select '11 22 33'::oidvector from generate_series(1,5)))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 77, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 297).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (6,8,'.000000000005523967081937952184172713994498918048454262874017009201501812494019618863622631634736130436187167745347383745890248619882896153083428308074678908731005176810208100004498415662458272149380846809398637385270265351808328466537502823071145089961996689711299405627596294988646826454676198092260759424935699382655736524042353938814268760468122584678267125994645166955751211397353140569987758938572953312303398024147927938612934833827734142292697389251052485981023756760420972614486278837214553818521196182883489483756785207650821722660455451660719560529693418375773124813290305501923899840247103166971466167032437598057958226806335324315214908788839919408525748236713611579486768218564733151121028172253396652755590051310396973181595992981076269789287489208817712754098019817792758730835341151711523474207');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 851, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the name for company d", "schema": "CREATE TABLE table_25794010_1 (name VARCHAR, company VARCHAR)", "sql": "SELECT name FROM table_25794010_1 WHERE company = 'company D';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Commercial Operation has both a Gross Capacity of 417 mw and an Electricity Grid 27.12.1971?", "schema": "CREATE TABLE table_name_48 (commercial_operation VARCHAR, gross_capacity VARCHAR, electricity_grid VARCHAR)", "sql": "SELECT commercial_operation FROM table_name_48 WHERE gross_capacity = '417 mw' AND electricity_grid = '27.12.1971';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "What is the average number of lifelong learning courses completed by teachers in each institution?", "schema": "CREATE TABLE teacher_lifelong_learning (teacher_id INT, institution_id INT, course_count INT);", "sql": "SELECT institution_id, AVG(course_count) as avg_courses FROM teacher_lifelong_learning GROUP BY institution_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 71).", "schema": null, "sql": "select hstore 'a=>NULL, b=>qq' ?| ARRAY['a','b'];", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'polymorphism' (example 141).", "schema": null, "sql": "insert into t values(3,array[3],'b');", "explanation": "DML from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who are the away teams when subiaco oval was the grounds?", "schema": "CREATE TABLE table_16388506_1 (away_team VARCHAR, ground VARCHAR)", "sql": "SELECT away_team FROM table_16388506_1 WHERE ground = 'Subiaco Oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many cases were handled by each restorative justice program?", "schema": "CREATE TABLE restorative_justice (id INT, program_name VARCHAR(255), cases_handled INT); INSERT INTO restorative_justice (id, program_name, cases_handled) VALUES (1, 'Victim-Offender Mediation', 35), (2, 'Restorative Circles', 47), (3, 'Community Conferencing', 29);", "sql": "SELECT program_name, SUM(cases_handled) FROM restorative_justice GROUP BY program_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which days have a stage of ss11?", "schema": "CREATE TABLE table_21536557_2 (day VARCHAR, stage VARCHAR)", "sql": "SELECT day FROM table_21536557_2 WHERE stage = 'SS11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total quantity of goods transported by container ships?", "schema": "CREATE TABLE Vessels (VesselID INT, VesselName VARCHAR(100), VesselType VARCHAR(100), PortID INT); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (1, 'Ever Ace', 'Container Ship', 1); INSERT INTO Vessels (VesselID, VesselName, VesselType, PortID) VALUES (2, 'Algeciras', 'Ro-Ro Ship', 2); CREATE TABLE Cargo (CargoID INT, CargoName VARCHAR(100), Quantity INT, VesselID INT); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (1, 'Electronics', 10000, 1); INSERT INTO Cargo (CargoID, CargoName, Quantity, VesselID) VALUES (2, 'Vehicles', 5000, 2);", "sql": "SELECT SUM(Cargo.Quantity) FROM Cargo INNER JOIN Vessels ON Cargo.VesselID = Vessels.VesselID WHERE Vessels.VesselType = 'Container Ship';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "What is the total budget allocated for education programs in the \"GovernmentBudget\" table, for each department, where the budget was over $100,000?", "schema": "CREATE TABLE GovernmentBudget (id INT, department VARCHAR(50), program VARCHAR(50), budget DECIMAL(10,2)); INSERT INTO GovernmentBudget (id, department, program, budget) VALUES (1, 'Education', 'Elementary School', 50000), (2, 'Education', 'High School', 120000), (3, 'Education', 'College', 200000), (4, 'Healthcare', 'Hospital', 300000);", "sql": "SELECT department, SUM(budget) as total_budget FROM GovernmentBudget WHERE budget > 100000 AND program LIKE '%Education%' GROUP BY department;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the height of the delegate whose hometown is Renton, WA?", "schema": "CREATE TABLE table_name_69 (height VARCHAR, hometown VARCHAR)", "sql": "SELECT height FROM table_name_69 WHERE hometown = 'renton, wa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Points, when Tries For is \"correct as of 00:00 11 June 2008\"?", "schema": "CREATE TABLE table_name_90 (points VARCHAR, tries_for VARCHAR)", "sql": "SELECT points FROM table_name_90 WHERE tries_for = 'correct as of 00:00 11 june 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the average quantity of products sold in stores located in urban areas?", "schema": "CREATE TABLE stores (store_id INT, location VARCHAR(20), quantity INT); INSERT INTO stores (store_id, location, quantity) VALUES (1, 'urban', 100), (2, 'rural', 50), (3, 'urban', 150), (4, 'suburban', 75);", "sql": "SELECT AVG(quantity) FROM stores WHERE location = 'urban';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the represents for los alcarrizos", "schema": "CREATE TABLE table_26301697_2 (represents VARCHAR, hometown VARCHAR)", "sql": "SELECT represents FROM table_26301697_2 WHERE hometown = 'Los Alcarrizos';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the average age of patients with diabetes in indigenous communities, and what is the total population in those communities?", "schema": "CREATE TABLE patients (id INT, name TEXT, age INT, is_indigenous BOOLEAN, has_diabetes BOOLEAN); INSERT INTO patients (id, name, age, is_indigenous, has_diabetes) VALUES (1, 'Juanita Flores', 50, true, true), (2, 'Brian White', 45, false, false), (3, 'Nia Brown', 60, true, true);", "sql": "SELECT AVG(patients.age), COUNT(patients.id) FROM patients WHERE patients.is_indigenous = true AND patients.has_diabetes = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name who wrote the episode by lawrence trilling", "schema": "CREATE TABLE table_27504682_1 (written_by VARCHAR, directed_by VARCHAR)", "sql": "SELECT written_by FROM table_27504682_1 WHERE directed_by = 'Lawrence Trilling';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the rank for thomas morgenstern", "schema": "CREATE TABLE table_24489942_10 (rank VARCHAR, name VARCHAR)", "sql": "SELECT rank FROM table_24489942_10 WHERE name = 'Thomas Morgenstern';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date did the Green-Communists receive 5.7%?", "schema": "CREATE TABLE table_1881642_1 (date_released VARCHAR, green_communist VARCHAR)", "sql": "SELECT date_released FROM table_1881642_1 WHERE green_communist = '5.7%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Which countries have the highest military budgets in the Asia-Pacific region, excluding China?", "schema": "CREATE TABLE military_budget (country VARCHAR(50), budget INT); INSERT INTO military_budget (country, budget) VALUES ('United States', 7050000000), ('China', 22800000000), ('Japan', 4936000000), ('India', 5574000000), ('South Korea', 4370000000);", "sql": "SELECT country, budget FROM military_budget WHERE country != 'China' AND country IN ('United States', 'Japan', 'India', 'South Korea') ORDER BY budget DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What was the total waste generation in kg for each city in the year 2020?", "schema": "CREATE TABLE waste_generation(city VARCHAR(255), year INT, amount FLOAT); INSERT INTO waste_generation(city, year, amount) VALUES('CityA', 2020, 123.45), ('CityB', 2020, 678.90);", "sql": "SELECT city, SUM(amount) FROM waste_generation WHERE year = 2020 GROUP BY city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 310).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION hasnt_tablespace( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Privs (assertion 89).", "schema": null, "sql": "SELECT * FROM test_anycols();", "explanation": "SQL assertion from pgTAP test suite for Privs.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the total number o production code where us viewers is 2.76?", "schema": "CREATE TABLE table_21726793_1 (production_code VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT COUNT(production_code) FROM table_21726793_1 WHERE us_viewers__million_ = '2.76';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Find the title of the course that is offered by more than one department.", "schema": "CREATE TABLE course (title VARCHAR)", "sql": "SELECT title FROM course GROUP BY title HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 17).", "schema": null, "sql": "SELECT count(*) >= 0 AS ok FROM pg_ls_replslotdir('not_existing_slot'); -- fails\n\n-- permanent slot has survived\nSELECT pg_drop_replication_slot('regression_slot_p');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 166, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: when first elected was 1898 what was the party?", "schema": "CREATE TABLE table_name_9 (party VARCHAR, first_elected VARCHAR)", "sql": "SELECT party FROM table_name_9 WHERE first_elected = '1898';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "What is the total biomass of seals in the Canadian Arctic?", "schema": "CREATE TABLE SealData (species VARCHAR(50), location VARCHAR(50), biomass FLOAT); INSERT INTO SealData (species, location, biomass) VALUES ('Hooded Seal', 'Canadian Arctic', 250.0), ('Ringed Seal', 'Canadian Arctic', 300.0), ('Bearded Seal', 'Canadian Arctic', 350.0);", "sql": "SELECT location, SUM(biomass) FROM SealData WHERE species IN ('Hooded Seal', 'Ringed Seal', 'Bearded Seal') GROUP BY location;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Count the number of clinics in rural New Mexico that have a capacity greater than 50.", "schema": "CREATE TABLE clinics (id INT, name VARCHAR(50), type VARCHAR(50), capacity INT, region VARCHAR(50)); INSERT INTO clinics (id, name, type, capacity, region) VALUES (1, 'Clinic A', 'Primary Care', 55, 'Rural New Mexico');", "sql": "SELECT COUNT(clinics.id) FROM clinics WHERE clinics.region = 'Rural New Mexico' AND clinics.capacity > 50;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "PostgreSQL regression test 'conversion': Write the SELECT query (example 62).", "schema": null, "sql": "select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;", "explanation": "Regression test for Conversion in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the average runtime of movies by genre?", "schema": "CREATE TABLE movie_data (title VARCHAR(255), runtime INT, genre VARCHAR(255)); INSERT INTO movie_data (title, runtime, genre) VALUES ('Die Hard', 132, 'Action'), ('The Shawshank Redemption', 142, 'Drama');", "sql": "SELECT genre, AVG(runtime) FROM movie_data GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the minimum explainability score for models developed in Africa?", "schema": "CREATE TABLE explainability_scores (id INT, model_id INT, score FLOAT); INSERT INTO explainability_scores (id, model_id, score) VALUES (1, 1, 0.75), (2, 2, 0.91), (3, 3, 0.68); CREATE TABLE models (id INT, name TEXT, country TEXT); INSERT INTO models (id, name, country) VALUES (1, 'ModelA', 'Africa'), (2, 'ModelB', 'Canada'), (3, 'ModelC', 'Africa');", "sql": "SELECT MIN(score) FROM explainability_scores JOIN models ON explainability_scores.model_id = models.id WHERE country = 'Africa';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "What is the average number of weapons for each naval vessel?", "schema": "CREATE TABLE NavalVessels (ID INT, Name VARCHAR(50), NumWeapons INT);", "sql": "SELECT AVG(NumWeapons) FROM NavalVessels;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "What is the minimum credit score of clients who have made an investment in the past month?", "schema": "CREATE TABLE clients (id INT, credit_score INT, registered_date DATE);CREATE TABLE investments (id INT, client_id INT, investment_date DATE); INSERT INTO clients (id, credit_score, registered_date) VALUES (1, 750, '2020-01-01'), (2, 650, '2019-01-01'), (3, 800, '2018-01-01'), (4, 700, '2017-01-01'), (5, 600, '2016-01-01'); INSERT INTO investments (id, client_id, investment_date) VALUES (1, 1, '2021-02-01'), (2, 1, '2021-03-01'), (3, 2, '2020-04-01'), (4, 3, '2019-05-01'), (5, 4, '2018-06-01'), (6, 1, '2021-02-02'), (7, 1, '2021-02-03'), (8, 5, '2021-03-01');", "sql": "SELECT MIN(credit_score) FROM clients c JOIN investments i ON c.id = i.client_id WHERE i.investment_date >= c.registered_date + INTERVAL '1 month';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who were the Opponents in the match on a Hard Surface with Catherine Suire as Partner and Outcome of runner-up?", "schema": "CREATE TABLE table_name_6 (opponents VARCHAR, partner VARCHAR, outcome VARCHAR, surface VARCHAR)", "sql": "SELECT opponents FROM table_name_6 WHERE outcome = 'runner-up' AND surface = 'hard' AND partner = 'catherine suire';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Which player from Africa has the highest score in the 'all_time_high_scores' table?", "schema": "CREATE TABLE all_time_high_scores (player_id INT, player_name TEXT, score INT, country TEXT);", "sql": "SELECT player_name, MAX(score) as high_score FROM all_time_high_scores WHERE country = 'Africa' GROUP BY player_name ORDER BY high_score DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "What is the total quantity of items shipped from the USA to Canada?", "schema": "CREATE TABLE Warehouse (id INT, location VARCHAR(50), quantity INT); INSERT INTO Warehouse (id, location, quantity) VALUES (1, 'USA', 300), (2, 'Canada', 250);", "sql": "SELECT SUM(quantity) FROM Warehouse WHERE location = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Which country in 'South America' has the most organic farms?", "schema": "CREATE TABLE organic_farms (id INT, country VARCHAR(50), region VARCHAR(50), no_farms INT); INSERT INTO organic_farms (id, country, region, no_farms) VALUES (1, 'Brazil', 'South America', 2000); INSERT INTO organic_farms (id, country, region, no_farms) VALUES (2, 'Argentina', 'South America', 3000); INSERT INTO organic_farms (id, country, region, no_farms) VALUES (3, 'Colombia', 'South America', 1500);", "sql": "SELECT country, MAX(no_farms) FROM organic_farms WHERE region = 'South America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many epsidode(s) had 3.63 million viewers?", "schema": "CREATE TABLE table_22353769_3 (episode__number VARCHAR, viewers__millions_ VARCHAR)", "sql": "SELECT COUNT(episode__number) FROM table_22353769_3 WHERE viewers__millions_ = '3.63';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are the numbers of episodes that had 0.79 million US views?", "schema": "CREATE TABLE table_28348757_6 (_number VARCHAR, us_viewers__million_ VARCHAR)", "sql": "SELECT _number FROM table_28348757_6 WHERE us_viewers__million_ = '0.79';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the player from La Costa Canyon High School?", "schema": "CREATE TABLE table_name_79 (player VARCHAR, school VARCHAR)", "sql": "SELECT player FROM table_name_79 WHERE school = 'la costa canyon high school';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many have times of 6", "schema": "CREATE TABLE table_13512105_3 (fastest_lap VARCHAR, rnd VARCHAR)", "sql": "SELECT COUNT(fastest_lap) FROM table_13512105_3 WHERE rnd = 6;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 228).", "schema": null, "sql": "SELECT make_timestamptz(1973, 07, 15, 08, 15, 55.33, '+2');", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT make_timestamptz(1973, 07, 15, 08, 15, 55.33, '+2')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Outcome of the game against Frederic Jeanclaude?", "schema": "CREATE TABLE table_name_55 (outcome VARCHAR, opponent VARCHAR)", "sql": "SELECT outcome FROM table_name_55 WHERE opponent = 'frederic jeanclaude';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: List the name of albums that are released by aritist whose name has 'Led'", "schema": "CREATE TABLE artists (id VARCHAR, name VARCHAR); CREATE TABLE albums (title VARCHAR, artist_id VARCHAR)", "sql": "SELECT T2.title FROM artists AS T1 JOIN albums AS T2 ON T1.id = T2.artist_id WHERE T1.name LIKE '%Led%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the pilot of max altitude of 55.9 miles?", "schema": "CREATE TABLE table_221315_3 (pilot VARCHAR, max_altitude__miles_ VARCHAR)", "sql": "SELECT pilot FROM table_221315_3 WHERE max_altitude__miles_ = '55.9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "How many articles were written by each author from underrepresented communities in the 'news_articles' table?", "schema": "CREATE TABLE news_articles (article_id INT, author VARCHAR(50), title VARCHAR(100), publication_date DATE, category VARCHAR(20), community_represented BOOLEAN); INSERT INTO news_articles (article_id, author, title, publication_date, category, community_represented) VALUES (1, 'Aisha Jones', 'Article 1', '2022-01-01', 'Politics', true), (2, 'Brian Chen', 'Article 2', '2022-01-02', 'Sports', false);", "sql": "SELECT author, SUM(community_represented) as articles_for_underrepresented FROM news_articles GROUP BY author;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What chassis does the shadow built car use?", "schema": "CREATE TABLE table_name_36 (chassis VARCHAR, constructor VARCHAR)", "sql": "SELECT chassis FROM table_name_36 WHERE constructor = 'shadow';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Calculate the risk score of clients who have made more than 10 transactions in the past month", "schema": "CREATE TABLE clients (client_id INT PRIMARY KEY, name VARCHAR(100)); CREATE TABLE client_transactions (transaction_id INT PRIMARY KEY, client_id INT, transaction_date DATE);", "sql": "SELECT c.name, AVG(r.risk_score) AS avg_risk_score FROM clients c JOIN (SELECT client_id, risk_score FROM client_risk_scores WHERE client_id IN (SELECT client_id FROM client_transactions WHERE transaction_date >= DATE_SUB(NOW(), INTERVAL 1 MONTH) GROUP BY client_id HAVING COUNT(*) > 10)) r ON c.client_id = r.client_id GROUP BY c.client_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 341, "num_statements": 1} {"question": "PostgreSQL regression test 'txid': Write the SELECT query (example 20).", "schema": null, "sql": "select txid_visible_in_snapshot('1000100010001012', '1000100010001000:1000100010001100:1000100010001012,1000100010001013');", "explanation": "Regression test for Txid in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select txid_visible_in_snapshot('1000100010001012', '1000100010001000:1000100010001100:1000100010001012,1000100010001013')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 7).", "schema": null, "sql": "INSERT INTO ctest VALUES ('password', '', '');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "What's the total amount of coal reserves for each mining region?", "schema": "CREATE TABLE coal_mines(id INT, region VARCHAR, reserves FLOAT); INSERT INTO coal_mines(id, region, reserves) VALUES (1, 'Appalachia', 1234.56), (2, 'Powder River', 789.10);", "sql": "SELECT region, SUM(reserves) FROM coal_mines GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Find the number of wells drilled by each company in the Bakken Formation.", "schema": "CREATE TABLE wells_by_company (company VARCHAR(255), formation VARCHAR(255), num_wells INT);", "sql": "SELECT company, formation, COUNT(*) AS num_wells FROM wells_by_company WHERE formation = 'Bakken Formation' GROUP BY company;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Update the geopolitical risk assessment of 'Project E' to medium", "schema": "CREATE TABLE geopolitical_risk (project_name VARCHAR(255), risk_level VARCHAR(255)); INSERT INTO geopolitical_risk (project_name, risk_level) VALUES ('Project A', 'medium'), ('Project B', 'low'), ('Project E', 'high');", "sql": "UPDATE geopolitical_risk SET risk_level = 'medium' WHERE project_name = 'Project E';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 234).", "schema": null, "sql": "CREATE INDEX CONCURRENTLY concur_index5 on concur_heap(f2) WHERE f1='x';", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": true, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 31).", "schema": null, "sql": "SELECT 'a | ff' < 'b & c'::tsquery as \"false\";", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'a | ff' < 'b & c'::tsquery as \"false\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score for game 18?", "schema": "CREATE TABLE table_name_87 (score VARCHAR, game VARCHAR)", "sql": "SELECT score FROM table_name_87 WHERE game = 18;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "What is the maximum revenue earned by restaurants serving 'Halal' dishes?", "schema": "CREATE TABLE restaurants (restaurant_id INT, name VARCHAR(50), cuisine VARCHAR(50), diet VARCHAR(50), revenue INT); INSERT INTO restaurants VALUES (1, 'Asian Fusion', 'Asian', 'Non-Halal', 5000), (2, 'Tuscan Bistro', 'Italian', 'Non-Halal', 7000), (3, 'Baja Coast', 'Mexican', 'Non-Halal', 4000), (4, 'Sushi House', 'Asian', 'Halal', 8000), (5, 'Pizzeria Rustica', 'Italian', 'Halal', 6000), (6, 'Taqueria El Paso', 'Mexican', 'Halal', 4500), (7, 'Mexican Grill', 'Mexican', 'Halal', 5500), (8, 'Halal Bistro', 'Middle Eastern', 'Halal', 9000);", "sql": "SELECT diet, MAX(revenue) FROM restaurants WHERE diet = 'Halal';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the game with an attendance larger than 62,491?", "schema": "CREATE TABLE table_name_55 (date VARCHAR, attendance INTEGER)", "sql": "SELECT date FROM table_name_55 WHERE attendance > 62 OFFSET 491;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "How many projects were completed in 'public_works' and 'civil_projects' between 2019-01-01 and 2021-12-31?", "schema": "CREATE TABLE public_works (id INT, project_name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE, total_cost FLOAT); INSERT INTO public_works (id, project_name, location, start_date, end_date, total_cost) VALUES (1, 'Road Repaving', 'New York', '2020-01-01', '2020-12-31', 500000.00), (2, 'Bridge Construction', 'California', '2019-05-01', '2021-03-31', 2000000.00); CREATE TABLE civil_projects (id INT, project_name VARCHAR(50), location VARCHAR(50), start_date DATE, end_date DATE, total_cost FLOAT); INSERT INTO civil_projects (id, project_name, location, start_date, end_date, total_cost) VALUES (1, 'Dam Construction', 'Texas', '2018-05-01', '2020-01-15', 1500000.00), (2, 'Water Treatment Plant Upgrade', 'Florida', '2019-06-01', '2021-03-31', 800000.00);", "sql": "SELECT COUNT(*) FROM (SELECT * FROM public_works WHERE start_date >= '2019-01-01' AND end_date <= '2021-12-31' UNION ALL SELECT * FROM civil_projects WHERE start_date >= '2019-01-01' AND end_date <= '2021-12-31') AS completed_projects;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 235, "num_statements": 1} {"question": "List the electric vehicle models and their adoption rates in the US.", "schema": "CREATE TABLE ElectricVehicleAdoption (Model VARCHAR(20), Country VARCHAR(10), AdoptionRate FLOAT);", "sql": "SELECT Model, AdoptionRate FROM ElectricVehicleAdoption WHERE Country = 'US';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (issue-295, item 1).", "schema": null, "sql": "CREATE TABLE rvs (\n id BIGSERIAL NOT NULL CONSTRAINT pk_rvs PRIMARY KEY,\n txt TEXT\n);", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 119, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where did the player loaned out on 27 november 2008 move to ?", "schema": "CREATE TABLE table_name_65 (moving_to VARCHAR, date_from VARCHAR)", "sql": "SELECT moving_to FROM table_name_65 WHERE date_from = '27 november 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which outgoing manager was replaced by Jafar Fatahi?", "schema": "CREATE TABLE table_22297140_3 (outgoing_manager VARCHAR, replaced_by VARCHAR)", "sql": "SELECT outgoing_manager FROM table_22297140_3 WHERE replaced_by = 'Jafar Fatahi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what are all the location where station number is c11", "schema": "CREATE TABLE table_11934032_1 (location VARCHAR, station_number VARCHAR)", "sql": "SELECT location FROM table_11934032_1 WHERE station_number = 'C11';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team has a yamaha constructor with all rounds?", "schema": "CREATE TABLE table_name_60 (team VARCHAR, rounds VARCHAR, constructor VARCHAR)", "sql": "SELECT team FROM table_name_60 WHERE rounds = 'all' AND constructor = 'yamaha';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Which OTAs in Australia have the highest number of hotel listings with AI-powered services?", "schema": "CREATE TABLE ota_hotel (ota_id INT, ota_name TEXT, region TEXT, ai_powered TEXT, hotel_listings INT); INSERT INTO ota_hotel (ota_id, ota_name, region, ai_powered, hotel_listings) VALUES (1, 'TravelEase', 'Australia', 'yes', 1000), (2, 'VoyagePlus', 'Australia', 'no', 800), (3, 'ExploreNow', 'Australia', 'yes', 1200);", "sql": "SELECT ota_name, MAX(hotel_listings) FROM ota_hotel WHERE region = 'Australia' AND ai_powered = 'yes' GROUP BY ota_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "What is the average emergency room wait time for patients in rural Brazil, and how many nurses are available per patient in the emergency room?", "schema": "CREATE TABLE patients (patient_id INT, age INT, emergency_room_wait_time INT, location VARCHAR(255)); INSERT INTO patients (patient_id, age, emergency_room_wait_time, location) VALUES (12, 32, 60, 'rural Brazil'); INSERT INTO patients (patient_id, age, emergency_room_wait_time, location) VALUES (13, 50, 90, 'rural Brazil'); CREATE TABLE nurses (nurse_id INT, location VARCHAR(255)); INSERT INTO nurses (nurse_id, location) VALUES (120, 'rural Brazil'); INSERT INTO nurses (nurse_id, location) VALUES (121, 'rural Brazil');", "sql": "SELECT AVG(emergency_room_wait_time) AS avg_wait_time, COUNT(nurses.nurse_id) / COUNT(DISTINCT patients.patient_id) AS nurses_per_patient FROM patients INNER JOIN nurses ON patients.location = nurses.location WHERE patients.location LIKE 'rural% Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which player from the 2004 CFL draft attended Wilfrid Laurier?", "schema": "CREATE TABLE table_10975034_2 (player VARCHAR, college VARCHAR)", "sql": "SELECT player FROM table_10975034_2 WHERE college = 'Wilfrid Laurier';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Austin Austin TX get the third place?", "schema": "CREATE TABLE table_2011349_2 (year VARCHAR, third_place VARCHAR)", "sql": "SELECT year FROM table_2011349_2 WHERE third_place = 'Austin Austin TX';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which McIntosh has a Stuart of bass, and a Linda McCartney of keyboards or drum?", "schema": "CREATE TABLE table_name_78 (mcintosh VARCHAR, stuart VARCHAR, linda_mccartney VARCHAR)", "sql": "SELECT mcintosh FROM table_name_78 WHERE stuart = 'bass' AND linda_mccartney = 'keyboards or drum';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the percentage of donations made in each year compared to the total donations?", "schema": "CREATE TABLE Donations (DonationID INT, DonationYear INT, DonationAmount DECIMAL(10,2)); INSERT INTO Donations (DonationID, DonationYear, DonationAmount) VALUES (1, 2020, 1000.00), (2, 2019, 1500.00), (3, 2018, 2000.00), (4, 2020, 500.00), (5, 2019, 800.00), (6, 2018, 1200.00), (7, 2017, 700.00), (8, 2016, 600.00);", "sql": "SELECT DonationYear, SUM(DonationAmount) AS TotalDonation, SUM(DonationAmount) OVER () AS TotalDonations, (SUM(DonationAmount) / SUM(DonationAmount) OVER ()) * 100.0 AS DonationPercentage FROM Donations GROUP BY DonationYear;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": false, "sql_length": 225, "num_statements": 1} {"question": "Identify space debris mitigation initiatives by ESA and NASA", "schema": "CREATE TABLE debris_mitigation (initiative VARCHAR(50), agency VARCHAR(50)); INSERT INTO debris_mitigation (initiative, agency) VALUES ('Active Debris Removal', 'ESA'), ('Passive Debris Removal', 'ESA'), ('Space Debris Sensor', 'NASA'), ('Orbital Debris Observatory', 'NASA');", "sql": "SELECT initiative FROM debris_mitigation WHERE agency IN ('ESA', 'NASA');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Increase the yield of crop 'moringa' in farm 'Sustainable Grove' by 30 in 2023", "schema": "CREATE TABLE farms (id INT, name TEXT, location TEXT, size FLOAT); INSERT INTO farms (id, name, location, size) VALUES (1, 'Sustainable Grove', 'India', 180.0); CREATE TABLE crops (id INT, farm_id INT, crop TEXT, yield INT, year INT); INSERT INTO crops (id, farm_id, crop, yield, year) VALUES (1, 1, 'moringa', 100, 2023);", "sql": "UPDATE crops SET yield = yield + 30 WHERE farm_id = (SELECT id FROM farms WHERE name = 'Sustainable Grove') AND crop = 'moringa' AND year = 2023;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "What percentage of products in each segment are certified cruelty-free?", "schema": "CREATE TABLE products (product_id INT, segment VARCHAR(20), certified_cruelty_free BOOLEAN); INSERT INTO products (product_id, segment, certified_cruelty_free) VALUES (1, 'Natural', true), (2, 'Organic', false), (3, 'Natural', true); CREATE TABLE segments (segment VARCHAR(20), num_products INT); INSERT INTO segments (segment, num_products) VALUES ('Natural', 3), ('Organic', 2);", "sql": "SELECT segment, COUNT(*) * 100.0 / num_products AS percentage FROM products INNER JOIN segments ON products.segment = segments.segment WHERE certified_cruelty_free = true GROUP BY segment;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Home Team has an Attendance of 1,859?", "schema": "CREATE TABLE table_name_14 (home_team VARCHAR, attendance VARCHAR)", "sql": "SELECT home_team FROM table_name_14 WHERE attendance = '1,859';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL Json: show example 7.", "schema": null, "sql": "SELECT doc->'site_name' FROM websites WHERE doc @> '{\"tags\":[{\"term\":\"paris\"}, {\"term\":\"food\"}]}';", "explanation": "Example from PostgreSQL documentation on Json.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "What is the total value of military equipment sold to Canada by Lockheed Martin in 2020?", "schema": "CREATE TABLE MilitaryEquipmentSales (seller VARCHAR(255), buyer VARCHAR(255), equipment VARCHAR(255), sale_value FLOAT, sale_date DATE); INSERT INTO MilitaryEquipmentSales (seller, buyer, equipment, sale_value, sale_date) VALUES ('Lockheed Martin', 'Canada', 'F-35 Fighter Jet', 90000000, '2020-03-15');", "sql": "SELECT SUM(sale_value) FROM MilitaryEquipmentSales WHERE seller = 'Lockheed Martin' AND buyer = 'Canada' AND sale_date BETWEEN '2020-01-01' AND '2020-12-31';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 157, "num_statements": 1} {"question": "PostgreSQL regression test 'nls': Write the SELECT query (example 4).", "schema": null, "sql": "SELECT test_translation();", "explanation": "Regression test for Nls in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT test_translation()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "What is the total network infrastructure investment in Texas for the last 3 years?", "schema": "CREATE TABLE infrastructure_investments (id INT, year INT, investment DECIMAL(10,2), state VARCHAR(50)); INSERT INTO infrastructure_investments (id, year, investment, state) VALUES (1, 2020, 500000, 'TX'); INSERT INTO infrastructure_investments (id, year, investment, state) VALUES (2, 2019, 600000, 'TX'); INSERT INTO infrastructure_investments (id, year, investment, state) VALUES (3, 2018, 700000, 'TX');", "sql": "SELECT SUM(investment) FROM infrastructure_investments WHERE state = 'TX' AND year BETWEEN 2018 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "What is the percentage of garments made from sustainable materials in each category?", "schema": "CREATE TABLE garments (garment_id INT, garment_name VARCHAR(255), category VARCHAR(255), is_sustainable_material BOOLEAN);", "sql": "SELECT category, 100.0 * AVG(CASE WHEN is_sustainable_material THEN 1.0 ELSE 0.0 END) AS percentage FROM garments GROUP BY category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the year model for 1.3", "schema": "CREATE TABLE table_1444201_1 (year_model VARCHAR, model VARCHAR)", "sql": "SELECT year_model FROM table_1444201_1 WHERE model = '1.3';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Insert a new record in the 'cities' table with name 'Yountville', state 'CA', and population 3000", "schema": "CREATE TABLE cities (id INT, name VARCHAR(50), state VARCHAR(2), population INT);", "sql": "INSERT INTO cities (name, state, population) VALUES ('Yountville', 'CA', 3000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Call (example 26).", "schema": null, "sql": "-- recursive with output arguments\n\nCREATE PROCEDURE test_proc7(x int, INOUT a int, INOUT b numeric)\nLANGUAGE plpgsql\nAS $$\nBEGIN\nIF x > 1 THEN\n a := x / 10;\n b := x / 2;\n CALL test_proc7(b::int, a, b);\nEND IF;\nEND;\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Call.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 228, "num_statements": 6} {"question": "Generate PostgreSQL SQL for: The boat named Agamemnon has what status?", "schema": "CREATE TABLE table_name_94 (status VARCHAR, name VARCHAR)", "sql": "SELECT status FROM table_name_94 WHERE name = 'agamemnon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: meadowlands sports complex is the circuit at which city/location?", "schema": "CREATE TABLE table_10707142_2 (city_location VARCHAR, circuit VARCHAR)", "sql": "SELECT city_location FROM table_10707142_2 WHERE circuit = 'Meadowlands Sports Complex';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "How many traditional arts are practiced in Europe?", "schema": "CREATE TABLE traditional_arts (id INT, name TEXT, type TEXT, region TEXT); INSERT INTO traditional_arts (id, name, type, region) VALUES (1, 'Catalan Castells', 'Construction', 'Europe');", "sql": "SELECT COUNT(*) FROM traditional_arts WHERE region = 'Europe';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 75).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 0097 BC');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "How many cybersecurity incidents were reported in each region in 2020?", "schema": "CREATE TABLE CybersecurityIncidents (id INT, region VARCHAR(255), year INT, description TEXT); INSERT INTO CybersecurityIncidents (id, region, year, description) VALUES (1, 'North America', 2020, 'Incident 1'), (2, 'Europe', 2019, 'Incident 2'), (3, 'Asia', 2020, 'Incident 3');", "sql": "SELECT region, YEAR(datetime) AS year, COUNT(*) FROM CybersecurityIncidents WHERE year = 2020 GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 101).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('Feb 28 17:32:01 1996');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of Richmond's home venue?", "schema": "CREATE TABLE table_name_45 (venue VARCHAR, home_team VARCHAR)", "sql": "SELECT venue FROM table_name_45 WHERE home_team = 'richmond';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Show an example of PostgreSQL SELECT (example 14).", "schema": null, "sql": "WITH t AS ( SELECT random() AS x FROM generate_series(1, 3) ) SELECT * FROM t UNION ALL SELECT * FROM t; x -------------------- 0.534150459803641 0.520092216785997 0.0735620250925422 0.534150459803641 0.520092216785997 0.0735620250925422;", "explanation": "PostgreSQL SELECT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 238, "num_statements": 2} {"question": "Calculate the total fuel efficiency for vessels with successful inspections in the \"vessel_summary\" view.", "schema": "CREATE VIEW vessel_summary AS SELECT vessel_id, AVG(avg_speed) AS average_speed, SUM(fuel_efficiency) AS total_fuel_efficiency, COUNT(*) FILTER (WHERE result = 'PASS') AS successful_inspections FROM vessel_performance JOIN safety_records ON vessel_performance.vessel_id = safety_records.vessel_id GROUP BY vessel_id;", "sql": "SELECT vessel_id, SUM(total_fuel_efficiency) FROM vessel_summary WHERE successful_inspections IS NOT NULL GROUP BY vessel_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 125, "num_statements": 1} {"question": "PostgreSQL regression test 'rowtypes': Write the SELECT query (example 18).", "schema": null, "sql": "select f1, q.c1 from quadtable;\t\t-- fails, q is a table reference\n\nselect f1, (q).c1, (qq.q).c1.i from quadtable qq;", "explanation": "Regression test for Rowtypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select f1, q.c1 from quadtable;\t\t-- fails, q is a table reference\n\nselect f1, (q).c1, (qq.q).c1.i from quadtable qq) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 2} {"question": "Generate PostgreSQL SQL for: what is the total number of reasons for change where the date the successor was seated is june 8, 1876?", "schema": "CREATE TABLE table_2192067_4 (reason_for_change VARCHAR, date_successor_seated VARCHAR)", "sql": "SELECT COUNT(reason_for_change) FROM table_2192067_4 WHERE date_successor_seated = 'June 8, 1876';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "How many athletes participated in wellbeing programs in 2021?", "schema": "CREATE TABLE wellbeing (id INT, athlete_id INT, year INT, program_type VARCHAR(255)); INSERT INTO wellbeing (id, athlete_id, year, program_type) VALUES (1, 1, 2021, 'Yoga'); INSERT INTO wellbeing (id, athlete_id, year, program_type) VALUES (2, 2, 2020, 'Meditation');", "sql": "SELECT COUNT(*) FROM wellbeing WHERE year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "List all players who have not played any FPS games", "schema": "CREATE TABLE Players (PlayerID INT, PlayedFPS BOOLEAN); INSERT INTO Players (PlayerID, PlayedFPS) VALUES (1, FALSE); INSERT INTO Players (PlayerID, PlayedFPS) VALUES (2, TRUE);", "sql": "SELECT * FROM Players WHERE PlayedFPS = FALSE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "List all the government buildings and their respective agencies in the state of Texas from the 'tx_government_database'", "schema": "CREATE TABLE tx_agencies (id INT PRIMARY KEY, name VARCHAR(255), state VARCHAR(255));CREATE TABLE tx_buildings (id INT PRIMARY KEY, name VARCHAR(255), agency_id INT, FOREIGN KEY (agency_id) REFERENCES tx_agencies(id)); INSERT INTO tx_agencies (id, name, state) VALUES (1, 'Texas Department of Transportation', 'Texas'); INSERT INTO tx_agencies (id, name, state) VALUES (2, 'Texas Parks and Wildlife Department', 'Texas');", "sql": "SELECT tx_buildings.name as building_name, tx_agencies.name as agency_name FROM tx_buildings INNER JOIN tx_agencies ON tx_buildings.agency_id = tx_agencies.id WHERE tx_agencies.state = 'Texas';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 193, "num_statements": 1} {"question": "What is the total area covered by marine protected areas in the 'indian_ocean'?", "schema": "CREATE TABLE marine_protected_areas (name VARCHAR(255), area_size INTEGER, region VARCHAR(255));", "sql": "SELECT SUM(area_size) FROM marine_protected_areas WHERE region = 'Indian Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'plpgsql' (example 727).", "schema": null, "sql": "CREATE TRIGGER transition_table_level2_ri_child_upd_trigger\n AFTER UPDATE ON transition_table_level2\n REFERENCING NEW TABLE AS i\n FOR EACH STATEMENT EXECUTE PROCEDURE\n transition_table_level2_ri_child_insupd_func();", "explanation": "DDL from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 221, "num_statements": 1} {"question": "PostgreSQL regression test 'rowtypes': Write the SELECT query (example 131).", "schema": null, "sql": "select row(1, 'abc')::testtype3 *<> row(1, 'abd')::testtype3;", "explanation": "Regression test for Rowtypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select row(1, 'abc')::testtype3 *<> row(1, 'abd')::testtype3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the date of the game with 38,062 people in attendance?", "schema": "CREATE TABLE table_name_80 (date VARCHAR, attendance VARCHAR)", "sql": "SELECT date FROM table_name_80 WHERE attendance = '38,062';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the minimum mass of spacecrafts manufactured by Orbital Sciences that have been launched?", "schema": "CREATE TABLE Spacecrafts (id INT, name VARCHAR(100), manufacturer VARCHAR(100), mass FLOAT, launched BOOLEAN); INSERT INTO Spacecrafts (id, name, manufacturer, mass, launched) VALUES (1, 'OrbitalShip 1', 'Orbital Sciences', 1000, true), (2, 'OrbitalShip 2', 'Orbital Sciences', 2000, false);", "sql": "SELECT MIN(mass) FROM Spacecrafts WHERE manufacturer = 'Orbital Sciences' AND launched = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '\"aaa\"', '$' RETURNING json ERROR ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '\"aaa\"', '$' RETURNING json ERROR ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "What is the average age of all dams in the 'Appalachian' mountain range that were built before 1950?", "schema": "CREATE TABLE Dams (id INT, name TEXT, mountainRange TEXT, constructionYear INT);", "sql": "SELECT mountainRange, AVG(YEAR(CURRENT_DATE) - constructionYear) FROM Dams WHERE mountainRange = 'Appalachian' AND constructionYear < 1950 GROUP BY mountainRange;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "Update the age of the player with PlayerID 1 to 26.", "schema": "CREATE TABLE Players (PlayerID INT, Age INT, Gender VARCHAR(10)); INSERT INTO Players VALUES (1,25,'Male'),(2,30,'Female'),(3,35,'Non-binary');", "sql": "UPDATE Players SET Age = 26 WHERE PlayerID = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "List the names and job titles of employees who have not completed diversity training.", "schema": "CREATE TABLE EmployeeTraining (EmployeeID INT, TrainingID INT, Completed BOOLEAN); CREATE TABLE Employees (EmployeeID INT, EmployeeName TEXT, JobTitle TEXT); CREATE TABLE Trainings (TrainingID INT, TrainingName TEXT); INSERT INTO Employees (EmployeeID, EmployeeName, JobTitle) VALUES (1, 'John Doe', 'Software Engineer'); INSERT INTO Employees (EmployeeID, EmployeeName, JobTitle) VALUES (2, 'Jane Smith', 'Project Manager'); INSERT INTO EmployeeTraining (EmployeeID, TrainingID, Completed) VALUES (1, 1, TRUE); INSERT INTO EmployeeTraining (EmployeeID, TrainingID, Completed) VALUES (2, 1, FALSE); INSERT INTO Trainings (TrainingID, TrainingName) VALUES (1, 'Diversity Training');", "sql": "SELECT Employees.EmployeeName, Employees.JobTitle FROM Employees INNER JOIN EmployeeTraining ON Employees.EmployeeID = EmployeeTraining.EmployeeID WHERE EmployeeTraining.Completed = FALSE AND EmployeeTraining.TrainingID IN (SELECT Trainings.TrainingID FROM Trainings WHERE Trainings.TrainingName = 'Diversity Training');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 320, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 419).", "schema": null, "sql": "create rule \"_RETURN\" as on select to rules_fooview_part do instead\n select 1 as x, 'aaa'::text as y;", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the home score with marek dupnitsa as opponent?", "schema": "CREATE TABLE table_name_33 (home VARCHAR, opponent VARCHAR)", "sql": "SELECT home FROM table_name_33 WHERE opponent = 'marek dupnitsa';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the On-air ID with the frequency of 105.9, and purpose of commercial?", "schema": "CREATE TABLE table_name_24 (on_air_id VARCHAR, frequency VARCHAR, purpose VARCHAR)", "sql": "SELECT on_air_id FROM table_name_24 WHERE frequency = '105.9' AND purpose = 'commercial';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the preliminaries of the contestant with a swimsuit less than 8.948 and an interview of 8.997?", "schema": "CREATE TABLE table_name_99 (preliminaries VARCHAR, swimsuit VARCHAR, interview VARCHAR)", "sql": "SELECT preliminaries FROM table_name_99 WHERE swimsuit < 8.948 AND interview = 8.997;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: which examples are pronounced et in australian", "schema": "CREATE TABLE table_17798093_20 (examples VARCHAR, australian VARCHAR)", "sql": "SELECT examples FROM table_17798093_20 WHERE australian = 'et';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "What is the average salary by mine and role?", "schema": "CREATE TABLE mine (id INT, name VARCHAR(50), location VARCHAR(50)); CREATE TABLE employee (id INT, mine_id INT, gender VARCHAR(10), role VARCHAR(20), salary INT);", "sql": "SELECT mine.name, employee.role, AVG(employee.salary) FROM employee JOIN mine ON employee.mine_id = mine.id GROUP BY mine.name, employee.role;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Find the number of bike trips per hour for the month of January 2022", "schema": "CREATE TABLE bike_trips (id INT PRIMARY KEY, trip_time TIMESTAMP, trip_duration INT);", "sql": "SELECT HOUR(trip_time) AS hour, COUNT(*) AS num_trips FROM bike_trips WHERE trip_time >= '2022-01-01 00:00:00' AND trip_time < '2022-02-01 00:00:00' GROUP BY hour;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "Which deep-sea expeditions have discovered new species?", "schema": "CREATE TABLE deep_sea_expeditions (expedition_id INT, name VARCHAR(100), discovered_new_species BOOLEAN); INSERT INTO deep_sea_expeditions (expedition_id, name, discovered_new_species) VALUES (1, 'Challenger Expedition', TRUE); INSERT INTO deep_sea_expeditions (expedition_id, name, discovered_new_species) VALUES (2, 'Galathea Expedition', FALSE);", "sql": "SELECT name FROM deep_sea_expeditions WHERE discovered_new_species = TRUE;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "How many security incidents were reported from Asia in the last quarter?", "schema": "CREATE TABLE security_incidents (id INT, region VARCHAR(255), incident_date DATE); INSERT INTO security_incidents (id, region, incident_date) VALUES (1, 'Asia', '2021-10-15'), (2, 'Europe', '2021-12-20'), (3, 'Asia', '2021-11-03');", "sql": "SELECT COUNT(*) FROM security_incidents WHERE incident_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) AND region = 'Asia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "What is the success rate of medication for patients with PTSD in Florida?", "schema": "CREATE TABLE patients (patient_id INT, patient_name TEXT, condition TEXT, therapist_id INT, treatment TEXT, success BOOLEAN); INSERT INTO patients (patient_id, patient_name, condition, therapist_id, treatment, success) VALUES (1, 'James Johnson', 'PTSD', 1, 'Medication', TRUE); INSERT INTO patients (patient_id, patient_name, condition, therapist_id, treatment, success) VALUES (2, 'Sophia Lee', 'PTSD', 1, 'Meditation', FALSE); CREATE TABLE therapists (therapist_id INT, therapist_name TEXT, state TEXT); INSERT INTO therapists (therapist_id, therapist_name, state) VALUES (1, 'Dr. Maria Rodriguez', 'Florida');", "sql": "SELECT COUNT(patients.success) * 100.0 / (SELECT COUNT(*) FROM patients WHERE patients.condition = 'PTSD' AND patients.therapist_id = 1) FROM patients WHERE patients.condition = 'PTSD' AND patients.treatment = 'Medication' AND patients.therapist_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 253, "num_statements": 1} {"question": "What is the maximum food cost percentage for menu items that have been ordered more than 100 times?", "schema": "CREATE TABLE MenuItems(menu_item_id INT, item_name VARCHAR(255), order_count INT, food_cost_percentage DECIMAL(5,2));", "sql": "SELECT MAX(food_cost_percentage) FROM MenuItems WHERE order_count > 100;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total quantity of sustainable textiles used in manufacturing for each trend?", "schema": "CREATE TABLE SustainableManufacturing (id INT, trend VARCHAR(20), fabric VARCHAR(20), quantity INT); INSERT INTO SustainableManufacturing (id, trend, fabric, quantity) VALUES (1, 'neutrals', 'organic linen', 600), (2, 'brights', 'recycled silk', 800);", "sql": "SELECT trend, SUM(quantity) FROM SustainableManufacturing WHERE fabric LIKE '%sustainable%' GROUP BY trend;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the SPA FEA when the LMS FEA was 5?", "schema": "CREATE TABLE table_name_60 (spa_fea VARCHAR, lms_fea VARCHAR)", "sql": "SELECT spa_fea FROM table_name_60 WHERE lms_fea = '5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Timo Lehkonen's highest pick number?", "schema": "CREATE TABLE table_2850912_5 (pick__number INTEGER, player VARCHAR)", "sql": "SELECT MAX(pick__number) FROM table_2850912_5 WHERE player = 'Timo Lehkonen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the total number of unique users who have liked a post in the 'social_media' database?", "schema": "CREATE TABLE posts (id INT, user_id INT, content TEXT, timestamp TIMESTAMP, likes INT); CREATE TABLE likes (post_id INT, user_id INT); CREATE TABLE users (id INT, name VARCHAR(50), gender VARCHAR(10), age INT, location VARCHAR(50));", "sql": "SELECT COUNT(DISTINCT users.id) AS total_unique_users FROM users JOIN likes ON users.id = likes.user_id JOIN posts ON likes.post_id = posts.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "What is the average rating of hotels in 'Asia' that have been reviewed more than 50 times?", "schema": "CREATE TABLE hotels (hotel_id INT, hotel_name VARCHAR(255), rating DECIMAL(2,1), country VARCHAR(255)); INSERT INTO hotels (hotel_id, hotel_name, rating, country) VALUES (1, 'Hotel Tokyo', 4.3, 'Japan'), (2, 'Hotel Mumbai', 4.0, 'India'), (3, 'Hotel Bangkok', 4.7, 'Thailand');", "sql": "SELECT AVG(rating) FROM (SELECT rating FROM hotels WHERE country = 'Asia' GROUP BY rating HAVING COUNT(*) > 50) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "List all deep-sea species in the Pacific Ocean and their maximum depths.", "schema": "CREATE TABLE deep_sea_species (name VARCHAR(255), habitat VARCHAR(255), max_depth FLOAT); INSERT INTO deep_sea_species (name, habitat, max_depth) VALUES ('Anglerfish', 'Pacific Ocean', 3000), ('Giant Squid', 'Pacific Ocean', 3300);", "sql": "SELECT name, max_depth FROM deep_sea_species WHERE habitat = 'Pacific Ocean';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of fai space flights when max mach is 5.65?", "schema": "CREATE TABLE table_221315_3 (fai_space_flights INTEGER, max_mach VARCHAR)", "sql": "SELECT MAX(fai_space_flights) FROM table_221315_3 WHERE max_mach = '5.65';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the score of the game that home team birmingham city played?", "schema": "CREATE TABLE table_name_84 (score VARCHAR, home_team VARCHAR)", "sql": "SELECT score FROM table_name_84 WHERE home_team = 'birmingham city';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'alter_table' (example 759).", "schema": null, "sql": "-- test case where the partitioning operator is a SQL function whose\n-- evaluation results in the table's relcache being rebuilt partway through\n-- the execution of an ATTACH PARTITION command\ncreate function at_test_sql_partop (int4, int4) returns int language sql\nas $$ select case when $1 = $2 then 0 when $1 > $2 then 1 else -1 end; $$;\ncreate operator class at_test_sql_partop for type int4 using btree as\n operator 1 < (int4, int4), operator 2 <= (int4, int4),\n operator 3 = (int4, int4), operator 4 >= (int4, int4),\n operator 5 > (int4, int4), function 1 at_test_sql_partop(int4, int4);\ncreate table at_test_sql_partop (a int) partition by range (a at_test_sql_partop);\ncreate table at_test_sql_partop_1 (a int);\nalter table at_test_sql_partop attach partition at_test_sql_partop_1 for values from (0) to (10);\ndrop table at_test_sql_partop;\ndrop operator class at_test_sql_partop using btree;\ndrop function at_test_sql_partop;\n\n\n/* Test case for bug #16242 */\n\n-- We create a parent and child where the child has missing\n-- non-null attribute values, and arrange to pass them through\n-- tuple conversion from the child to the parent tupdesc\ncreate table bar1 (a integer, b integer not null default 1)\n partition by range (a);\ncreate table bar2 (a integer);\ninsert into bar2 values (1);\nalter table bar2 add column b integer not null default 1;\n-- (at this point bar2 contains tuple with natts=1)\nalter table bar1 attach partition bar2 default;\n\n-- this works:\nselect * from bar1;\n\n-- this exercises tuple conversion:\ncreate function xtrig()\n returns trigger language plpgsql\nas $$\n declare\n r record;", "explanation": "PL/pgSQL object from PostgreSQL core test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 1623, "num_statements": 16} {"question": "Generate PostgreSQL SQL for: What did the team score while away in moorabbin oval?", "schema": "CREATE TABLE table_name_30 (away_team VARCHAR, venue VARCHAR)", "sql": "SELECT away_team AS score FROM table_name_30 WHERE venue = 'moorabbin oval';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the finishing time of Stage SS6?", "schema": "CREATE TABLE table_name_63 (time VARCHAR, stage VARCHAR)", "sql": "SELECT time FROM table_name_63 WHERE stage = 'ss6';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PostgreSQL regression test 'rules': Write the SELECT query (example 469).", "schema": null, "sql": "select * from rules_src;", "explanation": "Regression test for Rules in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from rules_src) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the record when they played New York?", "schema": "CREATE TABLE table_name_64 (record VARCHAR, opponent VARCHAR)", "sql": "SELECT record FROM table_name_64 WHERE opponent = 'new york';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the attendance with result of won 1-0", "schema": "CREATE TABLE table_name_27 (attendance VARCHAR, result VARCHAR)", "sql": "SELECT attendance FROM table_name_27 WHERE result = 'won 1-0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "List all public services provided by the city of San Francisco and their respective allocations for 2023, ordered by allocation amount in descending order.", "schema": "CREATE TABLE city_services (city varchar(50), year int, service varchar(50), allocation int); INSERT INTO city_services (city, year, service, allocation) VALUES ('San Francisco', 2023, 'Waste Collection', 25000000), ('San Francisco', 2023, 'Street Cleaning', 15000000), ('San Francisco', 2023, 'Parks Maintenance', 10000000);", "sql": "SELECT service, allocation FROM city_services WHERE city = 'San Francisco' AND year = 2023 ORDER BY allocation DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "How many rock songs were streamed last month?", "schema": "CREATE TABLE StreamingData (StreamID INT, SongID INT, StreamDate DATE, Genre VARCHAR(50), StreamCount INT); INSERT INTO StreamingData (StreamID, SongID, StreamDate, Genre, StreamCount) VALUES (1, 1, '2022-01-01', 'Rock', 100); INSERT INTO StreamingData (StreamID, SongID, StreamDate, Genre, StreamCount) VALUES (2, 2, '2022-01-02', 'Rock', 150);", "sql": "SELECT SUM(StreamCount) FROM StreamingData WHERE StreamDate >= DATEADD(month, -1, GETDATE()) AND Genre = 'Rock';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Eliminated has an Entered smaller than 2?", "schema": "CREATE TABLE table_name_42 (eliminated VARCHAR, entered INTEGER)", "sql": "SELECT eliminated AS by FROM table_name_42 WHERE entered < 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Points is the lowest one that has a Position larger than 5?", "schema": "CREATE TABLE table_name_36 (points INTEGER, position INTEGER)", "sql": "SELECT MIN(points) FROM table_name_36 WHERE position > 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many stumpings has Paul Nixon in his career?", "schema": "CREATE TABLE table_11303072_9 (stumpings VARCHAR, player VARCHAR)", "sql": "SELECT stumpings FROM table_11303072_9 WHERE player = 'Paul Nixon';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "What is the average budget for all movies produced in India?", "schema": "CREATE TABLE indian_movies (id INT, title VARCHAR(255), budget FLOAT, production_country VARCHAR(100)); INSERT INTO indian_movies (id, title, budget, production_country) VALUES (1, 'Movie1', 5000000, 'India'), (2, 'Movie2', 7000000, 'India'), (3, 'Movie3', 3000000, 'India');", "sql": "SELECT AVG(budget) FROM indian_movies;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 62).", "schema": null, "sql": "CREATE FUNCTION g_int_compress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'event_trigger' (example 19).", "schema": null, "sql": "DELETE FROM undroppable_objs WHERE object_identity = 'audit_tbls.schema_two_table_three';", "explanation": "DML from PostgreSQL core regression test for Event Trigger.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "How many events had more than 50 attendees from historically underrepresented communities?", "schema": "CREATE TABLE Events (EventID INT, EventDate DATE, TotalAttendees INT, CommunityType VARCHAR(255)); INSERT INTO Events (EventID, EventDate, TotalAttendees, CommunityType) VALUES (1, '2022-04-01', 75, 'Underrepresented'), (2, '2022-05-01', 30, 'Represented');", "sql": "SELECT COUNT(*) FROM Events WHERE TotalAttendees > 50 AND CommunityType = 'Underrepresented';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "How many heritage sites are in each continent?", "schema": "CREATE TABLE Heritagesites (id INT, name VARCHAR(255), continent VARCHAR(255)); INSERT INTO Heritagesites (id, name, continent) VALUES (1, 'Great Wall of China', 'Asia'), (2, 'Machu Picchu', 'South America'), (3, 'Eiffel Tower', 'Europe');", "sql": "SELECT continent, COUNT(*) FROM Heritagesites GROUP BY continent;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 32).", "schema": null, "sql": "select polyf(multirange(int4range(42, 49)), array[11]) as int, polyf(multirange(float8range(4.5, 7.8)), array[7]) as num;", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select polyf(multirange(int4range(42, 49)), array[11]) as int, polyf(multirange(float8range(4.5, 7.8)), array[7]) as num) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 121, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 51).", "schema": null, "sql": "SELECT page_header('ccc'::bytea);", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "What is the average age of players who have played in at least one esports tournament, and what is the total number of tournaments they have played in?", "schema": "CREATE TABLE Players (PlayerID INT, Age INT); INSERT INTO Players (PlayerID, Age) VALUES (1, 30); INSERT INTO Players (PlayerID, Age) VALUES (2, 25); CREATE TABLE Tournaments (TournamentID INT, TournamentName VARCHAR(50)); INSERT INTO Tournaments (TournamentID, TournamentName) VALUES (1, 'Tournament 1'); INSERT INTO Tournaments (TournamentID, TournamentName) VALUES (2, 'Tournament 2'); CREATE TABLE Participation (PlayerID INT, TournamentID INT); INSERT INTO Participation (PlayerID, TournamentID) VALUES (1, 1); INSERT INTO Participation (PlayerID, TournamentID) VALUES (2, 2);", "sql": "SELECT AVG(Players.Age) AS AvgAge, COUNT(DISTINCT Participation.TournamentID) AS NumTournaments FROM Players INNER JOIN Participation ON Players.PlayerID = Participation.PlayerID;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "What are the total marketing costs for each TV show genre in Q1 2020?", "schema": "CREATE TABLE TVShows (show_id INT, title VARCHAR(255), release_date DATE, genre VARCHAR(255), marketing_cost DECIMAL(5,2)); INSERT INTO TVShows (show_id, title, release_date, genre, marketing_cost) VALUES (1, 'Show1', '2019-10-01', 'Sci-Fi', 500000.00), (2, 'Show2', '2018-04-15', 'Comedy', 350000.00), (3, 'Show3', '2020-02-20', 'Action', 750000.00);", "sql": "SELECT genre, SUM(marketing_cost) FROM TVShows WHERE release_date >= '2020-01-01' AND release_date < '2020-04-01' GROUP BY genre;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "What is the average fare collected from each vehicle type?", "schema": "CREATE TABLE Fares (id INT, vehicle_type VARCHAR(10), fare DECIMAL(5,2)); INSERT INTO Fares (id, vehicle_type, fare) VALUES (1, 'Bus', 2.50), (2, 'Tram', 3.00), (3, 'Train', 5.00);", "sql": "SELECT vehicle_type, AVG(fare) FROM Fares GROUP BY vehicle_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 105).", "schema": null, "sql": "select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following),\n\tsalary, enroll_date from empsalary;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following),\n\tsalary, enroll_date from empsalary) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many PPV values are listed for when television service Sky Cinema 1?", "schema": "CREATE TABLE table_15887683_5 (ppv VARCHAR, television_service VARCHAR)", "sql": "SELECT COUNT(ppv) FROM table_15887683_5 WHERE television_service = 'Sky Cinema 1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "What was the total R&D expenditure for anti-inflammatory drugs?", "schema": "CREATE TABLE rd_expenditure (drug_class TEXT, expenditure INTEGER);", "sql": "SELECT SUM(expenditure) FROM rd_expenditure WHERE drug_class = 'anti-inflammatory';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is every value for Under-11 if Under-19 is Leong Siu Lynn?", "schema": "CREATE TABLE table_26368963_2 (under_11 VARCHAR, under_19 VARCHAR)", "sql": "SELECT under_11 FROM table_26368963_2 WHERE under_19 = 'Leong Siu Lynn';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the average no opinion score during 1954 November that is more favorable than 35?", "schema": "CREATE TABLE table_name_94 (no_opinion INTEGER, date VARCHAR, favorable VARCHAR)", "sql": "SELECT AVG(no_opinion) FROM table_name_94 WHERE date = '1954 november' AND favorable > 35;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Mascot of Brook?", "schema": "CREATE TABLE table_name_25 (mascot VARCHAR, location VARCHAR)", "sql": "SELECT mascot FROM table_name_25 WHERE location = 'brook';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which classrooms are used by grade 4?", "schema": "CREATE TABLE list (classroom VARCHAR, grade VARCHAR)", "sql": "SELECT DISTINCT classroom FROM list WHERE grade = 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Update the 'cost' value for the record with 'project_id' = 1 in the 'infrastructure_development' table to 350000000", "schema": "CREATE TABLE infrastructure_development (project_id INT, location VARCHAR(30), start_date DATE, end_date DATE, cost INT); INSERT INTO infrastructure_development (project_id, location, start_date, end_date, cost) VALUES (1, 'North Sea', '2017-01-01', '2021-12-31', 400000000);", "sql": "UPDATE infrastructure_development SET cost = 350000000 WHERE project_id = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of byes that has 13 losses and wins less than 5?", "schema": "CREATE TABLE table_name_41 (byes VARCHAR, losses VARCHAR, wins VARCHAR)", "sql": "SELECT COUNT(byes) FROM table_name_41 WHERE losses = 13 AND wins < 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Set 1 with set 2 at 21:17?", "schema": "CREATE TABLE table_name_93 (set_1 VARCHAR, set_2 VARCHAR)", "sql": "SELECT set_1 FROM table_name_93 WHERE set_2 = '21:17';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the team is ypiranga-sp what is the number of won games?", "schema": "CREATE TABLE table_15405904_1 (won INTEGER, team VARCHAR)", "sql": "SELECT MIN(won) FROM table_15405904_1 WHERE team = 'Ypiranga-SP';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Check (assertion 3).", "schema": null, "sql": "CREATE TABLE public.sometab(\n id INT NOT NULL PRIMARY KEY,\n name TEXT DEFAULT '' CHECK ( name IN ('foo', 'bar', 'baz') ),\n numb NUMERIC(10, 2),\n myint NUMERIC(8),\n CHECK (numb > 1.0 AND myint < 10)\n);", "explanation": "SQL assertion from pgTAP test suite for Check.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 220, "num_statements": 1} {"question": "What was the minimum response time for fire incidents in the Downtown district in 2021?", "schema": "CREATE TABLE districts (id INT, name TEXT); INSERT INTO districts (id, name) VALUES (1, 'Downtown'), (2, 'Uptown'), (3, 'Midtown'); CREATE TABLE fire_incidents (id INT, district_id INT, response_time INT, incident_date DATE); INSERT INTO fire_incidents (id, district_id, response_time, incident_date) VALUES (1, 1, 5, '2021-01-01'), (2, 1, 4, '2021-02-15'), (3, 1, 6, '2021-03-10');", "sql": "SELECT MIN(response_time) FROM fire_incidents WHERE district_id = 1 AND YEAR(incident_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What number pick was the player with the hometown school of concordia college?", "schema": "CREATE TABLE table_name_96 (pick VARCHAR, hometown_school VARCHAR)", "sql": "SELECT pick FROM table_name_96 WHERE hometown_school = 'concordia college';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Who is the most prolific artist in the database?", "schema": "CREATE TABLE Artworks (ArtworkID INT, Title VARCHAR(50), Gallery VARCHAR(50), ArtistID INT); INSERT INTO Artworks (ArtworkID, Title, Gallery, ArtistID) VALUES (1, 'Starry Night', 'ImpressionistGallery', 1); INSERT INTO Artworks (ArtworkID, Title, Gallery, ArtistID) VALUES (2, 'Sunflowers', 'ImpressionistGallery', 1); INSERT INTO Artworks (ArtworkID, Title, Gallery, ArtistID) VALUES (3, 'Untitled', 'ContemporaryArt', 2); INSERT INTO Artworks (ArtworkID, Title, Gallery, ArtistID) VALUES (4, 'Untitled2', 'ContemporaryArt', 2); INSERT INTO Artworks (ArtworkID, Title, Gallery, ArtistID) VALUES (5, 'Untitled3', 'ContemporaryArt', 3);", "sql": "SELECT ArtistID, COUNT(*) as Count FROM Artworks GROUP BY ArtistID ORDER BY Count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the social democrat released on October 10, 1999?", "schema": "CREATE TABLE table_name_94 (social_democratic VARCHAR, date_released VARCHAR)", "sql": "SELECT social_democratic FROM table_name_94 WHERE date_released = 'october 10, 1999';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sqljson_jsontable' (example 17).", "schema": null, "sql": "CREATE TEMP TABLE json_table_test (js) AS\n\t(VALUES\n\t\t('1'),\n\t\t('[]'),\n\t\t('{}'),\n\t\t('[1, 1.23, \"2\", \"aaaaaaa\", \"foo\", null, false, true, {\"aaa\": 123}, \"[1,2]\", \"\\\"str\\\"\"]')\n\t);", "explanation": "DDL from PostgreSQL core regression test for Sqljson Jsontable.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 175, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 14).", "schema": null, "sql": "SELECT 1 AS \"int\" OFFSET 1 FETCH FIRST 2 ROW ONLY;", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What is the average score of players who joined in the same month as the release of game V?", "schema": "CREATE TABLE game_V (player_id INT, join_date DATE, score INT);", "sql": "SELECT AVG(score) FROM game_V WHERE MONTH(join_date) = MONTH(DATE('2022-04-01'));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Which destination marketing organizations are in the Asia-Pacific region?", "schema": "CREATE TABLE dmo (id INT, name TEXT, region TEXT); INSERT INTO dmo (id, name, region) VALUES (1, 'Tourism Australia', 'Australia'), (2, 'Japan National Tourism Organization', 'Japan'), (3, 'New Zealand Tourism Board', 'New Zealand');", "sql": "SELECT name FROM dmo WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "List the names of the volunteers who have participated in disaster response training programs and their respective trainers, including the organization they represent.", "schema": "CREATE TABLE Volunteers (id INT, name TEXT, organization TEXT); INSERT INTO Volunteers (id, name, organization) VALUES (1, 'Alex', 'Red Cross'), (2, 'Jamie', 'Doctors Without Borders'), (3, 'Sophia', 'World Vision'); CREATE TABLE Trainers (id INT, name TEXT, organization TEXT); INSERT INTO Trainers (id, name, organization) VALUES (4, 'Mark', 'Red Cross'), (5, 'Emily', 'Doctors Without Borders'), (6, 'Oliver', 'World Vision'); CREATE TABLE Training_Programs (id INT, trainer_id INT, volunteer_id INT, program TEXT, date DATE); INSERT INTO Training_Programs (id, trainer_id, volunteer_id, program, date) VALUES (1, 4, 1, 'Disaster Response', '2022-02-15'), (2, 5, 2, 'Disaster Response', '2022-02-16'), (3, 6, 3, 'Disaster Response', '2022-02-17');", "sql": "SELECT V.name as volunteer_name, T.name as trainer_name, T.organization as trainer_organization FROM Volunteers V INNER JOIN Training_Programs TP ON V.id = TP.volunteer_id INNER JOIN Trainers T ON TP.trainer_id = T.id WHERE TP.program = 'Disaster Response';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 257, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 172).", "schema": null, "sql": "insert into test_range_gist select int4range(g, g+10) from generate_series(1,2000) g;", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average number of defense diplomacy events conducted by India from 2015 to 2020?", "schema": "CREATE TABLE defense_diplomacy (country VARCHAR(50), year INT, event_count INT); INSERT INTO defense_diplomacy (country, year, event_count) VALUES ('India', 2015, 5), ('India', 2016, 6), ('India', 2017, 7), ('India', 2018, 8), ('India', 2019, 9), ('India', 2020, 10);", "sql": "SELECT AVG(event_count) FROM defense_diplomacy WHERE country = 'India' AND year BETWEEN 2015 AND 2020;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "What is the total revenue from ticket sales for each team in the 'teams' and 'ticket_sales' tables?", "schema": "CREATE TABLE teams (name VARCHAR(30), city VARCHAR(30)); CREATE TABLE ticket_sales (team VARCHAR(30), revenue INT); INSERT INTO teams (name, city) VALUES ('Knicks', 'New York'), ('Lakers', 'Los Angeles'); INSERT INTO ticket_sales (team, revenue) VALUES ('Knicks', 1000000), ('Lakers', 1200000);", "sql": "SELECT teams.name, SUM(ticket_sales.revenue) FROM teams INNER JOIN ticket_sales ON teams.name = ticket_sales.team GROUP BY teams.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the nominee for 2005?", "schema": "CREATE TABLE table_name_83 (nominee_s_ VARCHAR, year VARCHAR)", "sql": "SELECT nominee_s_ FROM table_name_83 WHERE year = 2005;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the Electricity Grid 28.12.1972 officially shutdown?", "schema": "CREATE TABLE table_name_26 (shutdown VARCHAR, electricity_grid VARCHAR)", "sql": "SELECT shutdown FROM table_name_26 WHERE electricity_grid = '28.12.1972';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Where was the Deep - 30 Impact event held?", "schema": "CREATE TABLE table_name_55 (location VARCHAR, event VARCHAR)", "sql": "SELECT location FROM table_name_55 WHERE event = 'deep - 30 impact';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What are the unique combinations of security incident types and their corresponding statuses in the incident_responses table, excluding any records with 'In Progress' status?", "schema": "CREATE TABLE incident_responses (incident VARCHAR(50), status VARCHAR(15)); INSERT INTO incident_responses (incident, status) VALUES ('Incident 1', 'Resolved'), ('Incident 2', 'In Progress'), ('Incident 3', 'Resolved'), ('Incident 4', 'In Progress'), ('Incident 5', 'Assessed');", "sql": "SELECT incident, status FROM incident_responses WHERE status != 'In Progress' GROUP BY incident, status;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 104, "num_statements": 1} {"question": "What is the maximum discovery date of exoplanets found by ESA missions?", "schema": "CREATE TABLE missions (id INT, name VARCHAR(50), agency VARCHAR(50), start_date DATE, end_date DATE); INSERT INTO missions (id, name, agency, start_date, end_date) VALUES (1, 'Gaia', 'ESA', '2013-12-19', 'ONGOING'); INSERT INTO missions (id, name, agency, start_date, end_date) VALUES (2, 'CHEOPS', 'ESA', '2019-12-18', 'ONGOING'); CREATE TABLE exoplanets (id INT, name VARCHAR(50), mission VARCHAR(50), discovery_date DATE); INSERT INTO exoplanets (id, name, mission, discovery_date) VALUES (1, 'HD 219134 b', 'CHEOPS', '2020-05-20'); INSERT INTO exoplanets (id, name, mission, discovery_date) VALUES (2, 'TOI-1235 b', 'CHEOPS', '2021-02-26');", "sql": "SELECT MAX(discovery_date) FROM exoplanets INNER JOIN missions ON exoplanets.mission = missions.name WHERE missions.agency = 'ESA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 131, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 245).", "schema": null, "sql": "SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 208).", "schema": null, "sql": "CREATE OPERATOR < (\n\tPROCEDURE = isnlt,\n\tLEFTARG = ean13,\n\tRIGHTARG = isbn13,\n\tCOMMUTATOR = >,\n\tNEGATOR = >=,\n\tRESTRICT = scalarltsel,\n\tJOIN = scalarltjoinsel);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 160, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which name had an order number of 1?", "schema": "CREATE TABLE table_name_4 (name VARCHAR, order VARCHAR)", "sql": "SELECT name FROM table_name_4 WHERE order = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Population of the Place with an Area (km 2) larger than 498.77?", "schema": "CREATE TABLE table_name_51 (population VARCHAR, area__km_2__ INTEGER)", "sql": "SELECT COUNT(population) FROM table_name_51 WHERE area__km_2__ > 498.77;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Who is the youngest astronaut to have participated in a space mission?", "schema": "CREATE TABLE Astronauts (name VARCHAR(30), age INT, mission_name VARCHAR(30)); INSERT INTO Astronauts (name, age, mission_name) VALUES ('Gus Grissom', 36, 'Mercury-Redstone 4'), ('John Glenn', 40, 'Friendship 7');", "sql": "SELECT name, age FROM Astronauts ORDER BY age LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "What is the total amount of financial aid provided to refugees in Jordan and Turkey, grouped by organization?", "schema": "CREATE TABLE financial_aid (id INT, organization VARCHAR(255), country VARCHAR(255), amount DECIMAL(10, 2)); INSERT INTO financial_aid (id, organization, country, amount) VALUES ('1', 'UNHCR', 'Jordan', '500000'), ('2', 'WFP', 'Jordan', '600000'), ('3', 'UNICEF', 'Jordan', '400000'), ('4', 'Red Cross', 'Turkey', '700000'), ('5', 'Save the Children', 'Turkey', '800000'), ('6', 'World Vision', 'Turkey', '900000');", "sql": "SELECT organization, SUM(amount) as total_aid FROM financial_aid WHERE country IN ('Jordan', 'Turkey') GROUP BY organization;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "What is the number of accommodations provided per disability type, ordered by the most accommodated?", "schema": "CREATE TABLE AccommodationsByDisability (AccommodationID INT, AccommodationName VARCHAR(50), DisabilityType VARCHAR(50), Number INT); INSERT INTO AccommodationsByDisability (AccommodationID, AccommodationName, DisabilityType, Number) VALUES (1, 'Sign Language Interpretation', 'Hearing Loss', 500), (2, 'Wheelchair Access', 'Physical Disability', 700), (3, 'Braille Materials', 'Visual Impairment', 350), (4, 'Adaptive Equipment', 'Physical Disability', 600), (5, 'Assistive Technology', 'Intellectual Disability', 400), (6, 'Sensory Rooms', 'Autism Spectrum Disorder', 300);", "sql": "SELECT DisabilityType, SUM(Number) as TotalAccommodations, ROW_NUMBER() OVER (ORDER BY SUM(Number) DESC) as Rank FROM AccommodationsByDisability GROUP BY DisabilityType;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 169, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When party did the incumbent in the Arkansas 5 district belong to?", "schema": "CREATE TABLE table_1341897_6 (party VARCHAR, district VARCHAR)", "sql": "SELECT party FROM table_1341897_6 WHERE district = 'Arkansas 5';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What are the names and depths of the deepest ocean trenches in the Pacific Ocean?", "schema": "CREATE TABLE DeepestTrenches (id INT, name VARCHAR(255), depth FLOAT); INSERT INTO DeepestTrenches (id, name, depth) VALUES (1, 'Marianas Trench', 10994); INSERT INTO DeepestTrenches (id, name, depth) VALUES (2, 'Tonga Trench', 10882);", "sql": "SELECT name, depth FROM DeepestTrenches WHERE depth = (SELECT MAX(depth) FROM DeepestTrenches);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the total number of military bases in the United States and Canada, and the number of bases for each country?", "schema": "CREATE TABLE military_bases (id INT, name TEXT, location TEXT, country TEXT);INSERT INTO military_bases (id, name, location, country) VALUES (1, 'Fort Bragg', 'North Carolina', 'United States'), (2, 'CFB Trenton', 'Ontario', 'Canada');", "sql": "SELECT country, COUNT(*) as total_bases FROM military_bases GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What are the total number of music streams and ticket sales for artists from Latin America?", "schema": "CREATE TABLE music_streams(artist_id INT, region VARCHAR(50), streams INT); CREATE TABLE ticket_sales(artist_id INT, region VARCHAR(50), sales INT);", "sql": "SELECT SUM(streams) AS total_streams, SUM(sales) AS total_sales FROM music_streams JOIN ticket_sales ON music_streams.artist_id = ticket_sales.artist_id WHERE region = 'Latin America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 184, "num_statements": 1} {"question": "Insert records for 3 students in the student_mental_health table", "schema": "CREATE TABLE student_mental_health (id INT PRIMARY KEY, student_id INT, mental_health_score INT, assessment_date DATE);", "sql": "INSERT INTO student_mental_health (id, student_id, mental_health_score, assessment_date) VALUES (1, 101, 60, '2021-01-15'), (2, 102, 70, '2021-02-12'), (3, 103, 80, '2021-03-18');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the student ids of students who don't have any allergies?", "schema": "CREATE TABLE Has_allergy (StuID VARCHAR); CREATE TABLE Student (StuID VARCHAR)", "sql": "SELECT StuID FROM Student EXCEPT SELECT StuID FROM Has_allergy;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Which menu items have a lower preference score than the average preference score for vegetarian dishes?", "schema": "CREATE TABLE Menu (item_id INT, name VARCHAR(50), is_vegetarian BOOLEAN, preference_score INT); INSERT INTO Menu (item_id, name, is_vegetarian, preference_score) VALUES (1, 'Garden Salad', true, 80), (2, 'Cheese Pizza', false, 70), (3, 'Vegetable Lasagna', true, 75);", "sql": "SELECT item_id, name FROM Menu WHERE is_vegetarian = true AND preference_score < (SELECT AVG(preference_score) FROM Menu WHERE is_vegetarian = true);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When is an Opponent of evie dominikovic?", "schema": "CREATE TABLE table_name_72 (date VARCHAR, opponent VARCHAR)", "sql": "SELECT date FROM table_name_72 WHERE opponent = 'evie dominikovic';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Add new record to international_alliances table, including 'NATO' as alliance_name, 'USA' as member_country, 'Article 5' as type", "schema": "CREATE TABLE international_alliances (id INT PRIMARY KEY, alliance_name VARCHAR(100), member_country VARCHAR(50), type VARCHAR(50));", "sql": "INSERT INTO international_alliances (alliance_name, member_country, type) VALUES ('NATO', 'USA', 'Article 5');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "What is the maximum depth of all uranium mines in 'Country X'?", "schema": "CREATE TABLE uranium_mines (id INT, name TEXT, location TEXT, depth FLOAT); INSERT INTO uranium_mines (id, name, location, depth) VALUES (1, 'Mine F', 'Country X', 600.1); INSERT INTO uranium_mines (id, name, location, depth) VALUES (2, 'Mine G', 'Country X', 700.2); INSERT INTO uranium_mines (id, name, location, depth) VALUES (3, 'Mine H', 'Country Y', 500.3);", "sql": "SELECT MAX(depth) FROM uranium_mines WHERE location = 'Country X';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the under-11 was Aly Abou El Einen, who was the under-13?", "schema": "CREATE TABLE table_26368963_1 (under_13 VARCHAR, under_11 VARCHAR)", "sql": "SELECT under_13 FROM table_26368963_1 WHERE under_11 = 'Aly Abou El Einen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE HOMETOWN OF TONY STEWARD?", "schema": "CREATE TABLE table_11677691_2 (hometown VARCHAR, player VARCHAR)", "sql": "SELECT hometown FROM table_11677691_2 WHERE player = 'Tony Steward';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total biomass of fish in the Atlantic ocean by sustainable seafood category?", "schema": "CREATE TABLE fish_data (id INT, species TEXT, ocean TEXT, biomass FLOAT, sustainable_category TEXT); INSERT INTO fish_data (id, species, ocean, biomass, sustainable_category) VALUES (1, 'Species A', 'Atlantic', 1200, 'Sustainable'), (2, 'Species B', 'Atlantic', 1500, 'Unsustainable'), (3, 'Species C', 'Atlantic', 1800, 'Sustainable');", "sql": "SELECT sustainable_category, SUM(biomass) FROM fish_data WHERE ocean = 'Atlantic' AND sustainable_category IS NOT NULL GROUP BY sustainable_category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'truncate' (example 1).", "schema": null, "sql": "CREATE TABLE truncate_a (col1 integer primary key);", "explanation": "DDL from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the sum of attendance for the games played at Los Angeles Rams?", "schema": "CREATE TABLE table_name_39 (attendance INTEGER, opponent VARCHAR)", "sql": "SELECT SUM(attendance) FROM table_name_39 WHERE opponent = 'at los angeles rams';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 7).", "schema": null, "sql": "select jsonb '{\"a\": {\"a\": 12}}' @? '$.a.a';", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb '{\"a\": {\"a\": 12}}' @? '$.a.a') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 43, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 63).", "schema": null, "sql": "select format('>>%1$10s<<', 'Hello');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('>>%1$10s<<', 'Hello')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Theme of the coin with an Issue Price of $89.95?", "schema": "CREATE TABLE table_name_95 (theme VARCHAR, issue_price VARCHAR)", "sql": "SELECT theme FROM table_name_95 WHERE issue_price = '$89.95';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'rules': Write the SELECT query (example 597).", "schema": null, "sql": "SELECT * FROM ruletest1;", "explanation": "Regression test for Rules in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM ruletest1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "What is the number of unique severities in the 'vulnerabilities' table?", "schema": "CREATE TABLE schema1.vulnerabilities (id INT, name VARCHAR(255), severity VARCHAR(50), description TEXT, date_discovered DATE, last_observed DATE); INSERT INTO schema1.vulnerabilities (id, name, severity, description, date_discovered, last_observed) VALUES (1, 'SQL Injection', 'Critical', 'Allows unauthorized access', '2021-01-01', '2021-02-01');", "sql": "SELECT COUNT(DISTINCT severity) FROM schema1.vulnerabilities;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Which hotel in LATAM has the highest virtual tour engagement?", "schema": "CREATE TABLE latam_virtual_tours (hotel_id INT, hotel_name VARCHAR(255), views INT);", "sql": "SELECT hotel_id, hotel_name, MAX(views) FROM latam_virtual_tours;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'truncate' (example 101).", "schema": null, "sql": "INSERT INTO truncate_a DEFAULT VALUES;", "explanation": "DML from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "PostgreSQL regression test 'constraints': Write the SELECT query (example 123).", "schema": null, "sql": "SELECT * FROM PRIMARY_TBL;", "explanation": "Regression test for Constraints in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM PRIMARY_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the score of the match with a 3-2 result?", "schema": "CREATE TABLE table_name_61 (score VARCHAR, result VARCHAR)", "sql": "SELECT score FROM table_name_61 WHERE result = '3-2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Drop the view 'chemical_inventory'", "schema": "CREATE VIEW chemical_inventory AS SELECT * FROM chemical_data;", "sql": "DROP VIEW chemical_inventory;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is ranked larger than 4?", "schema": "CREATE TABLE table_name_72 (country VARCHAR, rank INTEGER)", "sql": "SELECT country FROM table_name_72 WHERE rank > 4;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "List the number of smart contracts created each month in 2021, for a specific regulatory framework?", "schema": "CREATE TABLE smart_contracts (contract_id INT, creation_date DATE, regulatory_framework VARCHAR(255)); INSERT INTO smart_contracts (contract_id, creation_date, regulatory_framework) VALUES (1, '2021-01-01', 'SEC'), (2, '2021-01-15', 'EU'), (3, '2021-02-03', 'SEC'), (4, '2021-03-20', 'EU'), (5, '2021-04-01', 'SEC');", "sql": "SELECT MONTH(creation_date) as month, COUNT(*) as smart_contracts_created FROM smart_contracts WHERE regulatory_framework = 'SEC' AND YEAR(creation_date) = 2021 GROUP BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 176, "num_statements": 1} {"question": "What is the minimum age of visitors who attended exhibitions in London?", "schema": "CREATE TABLE Exhibitions (exhibition_id INT, location VARCHAR(255));CREATE TABLE Visitors (visitor_id INT, exhibition_id INT, age INT); INSERT INTO Exhibitions (exhibition_id, location) VALUES (1, 'London'), (2, 'Paris'), (3, 'London'); INSERT INTO Visitors (visitor_id, exhibition_id, age) VALUES (1, 1, 30), (2, 1, 45), (3, 2, 25), (4, 3, 18), (5, 3, 19);", "sql": "SELECT MIN(age) FROM Visitors JOIN Exhibitions ON Visitors.exhibition_id = Exhibitions.exhibition_id WHERE Exhibitions.location = 'London';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 334).", "schema": null, "sql": "SELECT to_date('2011 12 18', 'YYYYxMMxDD');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_date('2011 12 18', 'YYYYxMMxDD')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many ranks have $1,084,439,099 as the worldwide gross?", "schema": "CREATE TABLE table_name_91 (rank INTEGER, worldwide_gross VARCHAR)", "sql": "SELECT SUM(rank) FROM table_name_91 WHERE worldwide_gross = '$1,084,439,099';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'stats_rewrite' (example 40).", "schema": null, "sql": "INSERT INTO test_2pc_multi VALUES (5);", "explanation": "DML from PostgreSQL core regression test for Stats Rewrite.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the top grid that laps less than 66 and a retried engine?", "schema": "CREATE TABLE table_name_28 (grid INTEGER, time_retired VARCHAR, laps VARCHAR)", "sql": "SELECT MAX(grid) FROM table_name_28 WHERE time_retired = 'engine' AND laps < 66;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many combination classifications have the winner as Erik Zabel and a points classification as Alessandro Petacchi", "schema": "CREATE TABLE table_15059783_1 (combination_classification VARCHAR, points_classification VARCHAR, winner VARCHAR)", "sql": "SELECT COUNT(combination_classification) FROM table_15059783_1 WHERE points_classification = 'Alessandro Petacchi' AND winner = 'Erik Zabel';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Composite (example 17).", "schema": null, "sql": "SELECT * FROM multiout_record_as('dict', null, null, true);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Composite.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "What is the distribution of visitor demographics by time of day?", "schema": "CREATE TABLE visitor_demographics (id INT, age INT, gender TEXT, visit_time TEXT); INSERT INTO visitor_demographics (id, age, gender, visit_time) VALUES (1, 23, 'Female', 'Morning'), (2, 35, 'Male', 'Afternoon');", "sql": "SELECT visit_time, age, gender, COUNT(*) FROM visitor_demographics GROUP BY visit_time, age, gender;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Show Underwriting data for policies with an 'Auto' type, a premium less than or equal to $1000, and a PolicyholderID greater than 2.", "schema": "CREATE TABLE Underwriting (PolicyID INT, PolicyholderID INT, PolicyType TEXT, Premium INT); INSERT INTO Underwriting (PolicyID, PolicyholderID, PolicyType, Premium) VALUES (101, 1, 'Auto', 1200), (102, 2, 'Life', 500), (103, 3, 'Auto', 800), (104, 4, 'Life', 600);", "sql": "SELECT * FROM Underwriting WHERE PolicyType = 'Auto' AND Premium <= 1000 AND PolicyholderID > 2;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the average salary of construction workers per state?", "schema": "CREATE TABLE WorkerSalaries (WorkerID int, Name varchar(50), State varchar(25), Salary decimal(10,2)); INSERT INTO WorkerSalaries (WorkerID, Name, State, Salary) VALUES (1, 'John Doe', 'NY', 50000.00), (2, 'Jane Smith', 'CA', 60000.00), (3, 'Mike Johnson', 'TX', 55000.00);", "sql": "SELECT State, AVG(Salary) AS AvgSalaryPerState FROM WorkerSalaries GROUP BY State;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team had a game of 82?", "schema": "CREATE TABLE table_name_60 (team VARCHAR, game VARCHAR)", "sql": "SELECT team FROM table_name_60 WHERE game = 82;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the average age of visitors who attended dance performances in New York?", "schema": "CREATE TABLE events (event_id INT, event_name VARCHAR(50), location VARCHAR(50)); INSERT INTO events (event_id, event_name, location) VALUES (1, 'Dance Performance', 'New York'); CREATE TABLE attendees (attendee_id INT, event_id INT, age INT); INSERT INTO attendees (attendee_id, event_id, age) VALUES (1, 1, 35), (2, 1, 42), (3, 1, 28), (4, 1, 32);", "sql": "SELECT AVG(age) FROM attendees JOIN events ON attendees.event_id = events.event_id WHERE events.location = 'New York' AND events.event_name = 'Dance Performance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "What is the average altitude of satellites in the Starlink constellation?", "schema": "CREATE TABLE Satellites (satellite_id INT, name VARCHAR(255), country VARCHAR(255), altitude FLOAT, constellation VARCHAR(255)); INSERT INTO Satellites (satellite_id, name, country, altitude, constellation) VALUES (1, 'SpaceX-1', 'USA', 550, 'Starlink'), (2, 'SpaceX-2', 'USA', 550, 'Starlink'), (3, 'OneWeb-1', 'UK', 1200, 'OneWeb');", "sql": "SELECT AVG(altitude) FROM Satellites WHERE constellation = 'Starlink';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Song has Points smaller than 54, and a Draw smaller than 4, and a Place smaller than 10?", "schema": "CREATE TABLE table_name_90 (song VARCHAR, place VARCHAR, points VARCHAR, draw VARCHAR)", "sql": "SELECT song FROM table_name_90 WHERE points < 54 AND draw < 4 AND place < 10;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the surface for paul goldstein and date of october 2, 2006", "schema": "CREATE TABLE table_name_79 (surface VARCHAR, partnering VARCHAR, date VARCHAR)", "sql": "SELECT surface FROM table_name_79 WHERE partnering = 'paul goldstein' AND date = 'october 2, 2006';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "What is the total weight of ingredients sourced from the US, for each product category?", "schema": "CREATE TABLE product_ingredients (ingredient_id INT, product_id INT, ingredient VARCHAR(255), source_country VARCHAR(255)); CREATE TABLE products (product_id INT, product VARCHAR(255), category VARCHAR(255)); INSERT INTO product_ingredients (ingredient_id, product_id, ingredient, source_country) VALUES (1, 1, 'Vitamin C', 'US'), (2, 1, 'Shea Butter', 'Ghana'), (3, 2, 'Jojoba Oil', 'US'), (4, 2, 'Aloe Vera', 'Mexico'); INSERT INTO products (product_id, product, category) VALUES (1, 'Face Cream', 'Skin Care'), (2, 'Hair Serum', 'Hair Care');", "sql": "SELECT p.category, SUM(pi.ingredient_id) as total_us_weight FROM product_ingredients pi JOIN products p ON pi.product_id = p.product_id WHERE pi.source_country = 'US' GROUP BY p.category;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which writer worked for Intrepido LTD for a film on 17/03/04?", "schema": "CREATE TABLE table_name_35 (writer_s_ VARCHAR, recipient VARCHAR, date VARCHAR)", "sql": "SELECT writer_s_ FROM table_name_35 WHERE recipient = 'intrepido ltd' AND date = '17/03/04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "What are the total greenhouse gas emissions for each mine, ranked from highest to lowest?", "schema": "CREATE TABLE Mine (MineID int, MineName varchar(50), Location varchar(50)); CREATE TABLE Emission (EmissionID int, MineID int, EmissionType varchar(50), EmissionQuantity int); INSERT INTO Mine VALUES (1, 'ABC Mine', 'Colorado'), (2, 'DEF Mine', 'Wyoming'), (3, 'GHI Mine', 'West Virginia'); INSERT INTO Emission VALUES (1, 1, 'CO2', 1000), (2, 1, 'CH4', 200), (3, 2, 'CO2', 1500), (4, 2, 'CH4', 300), (5, 3, 'CO2', 1200), (6, 3, 'CH4', 400);", "sql": "SELECT MineName, SUM(EmissionQuantity) as TotalEmissionQuantity FROM Emission INNER JOIN Mine ON Emission.MineID = Mine.MineID GROUP BY MineName ORDER BY TotalEmissionQuantity DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "What is the average size of marine protected areas in the Indian Ocean and Mediterranean Sea?", "schema": "CREATE TABLE marine_protected_areas (id INT, name VARCHAR(255), location VARCHAR(255), size FLOAT); INSERT INTO marine_protected_areas (id, name, location, size) VALUES (1, 'Maldives Atoll Marine Park', 'Indian Ocean', 90000); INSERT INTO marine_protected_areas (id, name, location, size) VALUES (2, 'Palestine National Marine Park', 'Mediterranean Sea', 3500);", "sql": "SELECT AVG(size) FROM marine_protected_areas WHERE location IN ('Indian Ocean', 'Mediterranean Sea');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "What is the latest smart city technology adoption date in the 'smart_city_technology' table?", "schema": "CREATE TABLE smart_city_technology (tech_id INT, tech_name VARCHAR(100), adoption_date DATE); INSERT INTO smart_city_technology (tech_id, tech_name, adoption_date) VALUES (1, 'Smart Grid', '2020-03-15'), (2, 'Smart Lighting', '2019-08-01');", "sql": "SELECT MAX(adoption_date) FROM smart_city_technology;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What's the total number of players who play strategy games on VR platforms?", "schema": "CREATE TABLE Players (PlayerID INT, Name VARCHAR(100), Age INT, FavoriteGenre VARCHAR(50), VRPossible BOOLEAN); INSERT INTO Players (PlayerID, Name, Age, FavoriteGenre, VRPossible) VALUES (1, 'John Doe', 25, 'Action', true), (2, 'Jane Smith', 28, 'Adventure', true), (3, 'James Johnson', 30, 'Simulation', false), (4, 'Emily Davis', 24, 'Strategy', true);", "sql": "SELECT COUNT(*) FROM Players WHERE FavoriteGenre = 'Strategy' AND VRPossible = true;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players had 8 goals?", "schema": "CREATE TABLE table_28286776_52 (player VARCHAR, goal_s_ VARCHAR)", "sql": "SELECT COUNT(player) FROM table_28286776_52 WHERE goal_s_ = 8;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date has 2:48 as the time?", "schema": "CREATE TABLE table_name_48 (date VARCHAR, time VARCHAR)", "sql": "SELECT date FROM table_name_48 WHERE time = '2:48';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total area (in hectares) of farmland in the 'organic_farms' table?", "schema": "CREATE TABLE organic_farms (farmer_id INT, farm_name VARCHAR(50), location VARCHAR(50), area_ha FLOAT); INSERT INTO organic_farms (farmer_id, farm_name, location, area_ha) VALUES (1, 'Farm 1', 'Location 1', 15.6);", "sql": "SELECT SUM(area_ha) FROM organic_farms;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "List all environmental impact stats for mining operations in Russia and Canada?", "schema": "CREATE TABLE canadian_provinces (id INT, name VARCHAR(50)); CREATE TABLE russian_regions (id INT, name VARCHAR(50)); CREATE TABLE mining_operations (id INT, country_id INT, region VARCHAR(20), annual_co2_emissions INT); INSERT INTO canadian_provinces (id, name) VALUES (1, 'Alberta'), (2, 'British Columbia'); INSERT INTO russian_regions (id, name) VALUES (1, 'Siberia'), (2, 'Urals'); INSERT INTO mining_operations (id, country_id, region, annual_co2_emissions) VALUES (1, 1, 'Canada', 5000), (2, 1, 'Canada', 6000), (3, 2, 'Russia', 7000), (4, 2, 'Russia', 8000);", "sql": "SELECT m.annual_co2_emissions, m.region FROM mining_operations m INNER JOIN (SELECT * FROM canadian_provinces WHERE name = 'Alberta' UNION ALL SELECT * FROM russian_regions WHERE name = 'Siberia') c ON m.country_id = c.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number 1980 mil for soviet union", "schema": "CREATE TABLE table_22071705_6 (country VARCHAR)", "sql": "SELECT COUNT(1980 AS __mil_) FROM table_22071705_6 WHERE country = 'Soviet Union';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the North Melbourne's score when they played as the home team?", "schema": "CREATE TABLE table_name_52 (home_team VARCHAR)", "sql": "SELECT home_team AS score FROM table_name_52 WHERE home_team = 'north melbourne';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Delete the 'gene sequencing' project from the 'researchers' table.", "schema": "CREATE TABLE researchers (id INT, name VARCHAR(50), project VARCHAR(50)); INSERT INTO researchers (id, name, project) VALUES (1, 'Alice', 'gene sequencing'), (2, 'Bob', 'biosensor development'), (3, 'Charlie', 'gene sequencing');", "sql": "DELETE FROM researchers WHERE project = 'gene sequencing';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the average age of visitors to dance programs in 2022?", "schema": "CREATE TABLE dance_attendance (id INT, attendee_age INT, program_type VARCHAR(255), visit_year INT);", "sql": "SELECT program_type, AVG(attendee_age) OVER (PARTITION BY program_type) AS avg_age_by_program_type FROM dance_attendance WHERE visit_year = 2022 AND program_type LIKE '%dance%' ORDER BY program_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 200, "num_statements": 1} {"question": "How many military aircraft are currently under maintenance?", "schema": "CREATE TABLE aircraft (id INT, model VARCHAR(50), maintenance_status VARCHAR(50)); INSERT INTO aircraft (id, model, maintenance_status) VALUES (1, 'F-16', 'under_maintenance'), (2, 'F-35', 'operational'), (3, 'A-10', 'under_maintenance');", "sql": "SELECT COUNT(*) FROM aircraft WHERE maintenance_status = 'under_maintenance';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (util_time, item 14).", "schema": null, "sql": "-- Convert integer UNIX timestamps in microsecond to a timestamp range.\nCREATE OR REPLACE FUNCTION _timescaledb_functions.make_multirange_from_internal_time(\n base tstzrange, low_usec bigint, high_usec bigint\n) RETURNS TSTZMULTIRANGE AS\n$body$\n select multirange(tstzrange(_timescaledb_functions.to_timestamp(low_usec),\n\t\t\t _timescaledb_functions.to_timestamp(high_usec)));", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 382, "num_statements": 1} {"question": "Find the number of players who adopted VR technology in Canada", "schema": "CREATE TABLE Players (PlayerID INT, PlayerName VARCHAR(50), Country VARCHAR(50)); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (1, 'John Smith', 'Canada'); INSERT INTO Players (PlayerID, PlayerName, Country) VALUES (2, 'Jane Doe', 'USA'); CREATE TABLE VRAdoption (PlayerID INT, VRAdopted DATE); INSERT INTO VRAdoption (PlayerID, VRAdopted) VALUES (1, '2021-08-01');", "sql": "SELECT COUNT(*) FROM Players p INNER JOIN VRAdoption va ON p.PlayerID = va.PlayerID WHERE p.Country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 111, "num_statements": 1} {"question": "What is the average cost of medical procedures performed in each hospital in 2020?", "schema": "CREATE TABLE medical_procedures (id INT, name TEXT, hospital TEXT, procedure_date DATE, cost FLOAT); INSERT INTO medical_procedures (id, name, hospital, procedure_date, cost) VALUES (1, 'Medical Procedure 1', 'Hospital A', '2020-01-01', 100.00), (2, 'Medical Procedure 2', 'Hospital A', '2020-02-01', 200.00), (3, 'Medical Procedure 3', 'Hospital B', '2020-03-01', 300.00), (4, 'Medical Procedure 4', 'Hospital C', '2020-04-01', 400.00);", "sql": "SELECT hospital, AVG(cost) as avg_cost FROM medical_procedures WHERE procedure_date >= '2020-01-01' AND procedure_date < '2021-01-01' GROUP BY hospital;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Find the number of wells drilled in each country in 2020", "schema": "CREATE TABLE wells (id INT, country VARCHAR(50), drill_date DATE); INSERT INTO wells (id, country, drill_date) VALUES (1, 'USA', '2020-01-01'); INSERT INTO wells (id, country, drill_date) VALUES (2, 'Canada', '2020-02-15');", "sql": "SELECT country, COUNT(*) as num_wells FROM wells WHERE YEAR(drill_date) = 2020 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 351).", "schema": null, "sql": "SELECT '2011-03-26 23:59:59 UTC'::timestamptz AT TIME ZONE 'MSK';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-26 23:59:59 UTC'::timestamptz AT TIME ZONE 'MSK') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "What is the average contract amount for negotiations in the defense sector?", "schema": "CREATE TABLE negotiations (id INT PRIMARY KEY, sector VARCHAR(255), contract_amount DECIMAL(10,2), negotiation_date DATE); INSERT INTO negotiations (id, sector, contract_amount, negotiation_date) VALUES (1, 'Defense', 150000000.00, '2022-08-01');", "sql": "SELECT AVG(contract_amount) FROM negotiations WHERE sector = 'Defense';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Update all mental health assessment scores below 70 for students in the past month in the 'student_mental_health' table to 70.", "schema": "CREATE TABLE student_mental_health (student_id INT, assessment_date DATE, assessment_score INT);", "sql": "UPDATE student_mental_health SET assessment_score = 70 WHERE assessment_score < 70 AND assessment_date >= DATE(NOW()) - INTERVAL 1 MONTH;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 137, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 46).", "schema": null, "sql": "CREATE FUNCTION gbt_oid_fetch(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER TABLE (example 6).", "schema": null, "sql": "ALTER TABLE foo ALTER COLUMN foo_timestamp SET DATA TYPE timestamp with time zone USING timestamp with time zone 'epoch' + foo_timestamp * interval '1 second';", "explanation": "PostgreSQL ALTER TABLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 159, "num_statements": 1} {"question": "Insert a new record into the \"international_visitor_statistics\" table for \"China\" with 2022 visit data", "schema": "CREATE TABLE international_visitor_statistics (id INT PRIMARY KEY, country TEXT, year INT, visitor_count INT);", "sql": "INSERT INTO international_visitor_statistics (id, country, year, visitor_count) VALUES (1, 'China', 2022, 20000000);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the original airdate of the episode directed by Pete Travis?", "schema": "CREATE TABLE table_12919003_3 (original_airdate VARCHAR, director VARCHAR)", "sql": "SELECT original_airdate FROM table_12919003_3 WHERE director = 'Pete Travis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 232).", "schema": null, "sql": "create table rtest_view2 (a int4);", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which team does Pierre Duguay play for?", "schema": "CREATE TABLE table_1213511_6 (nhl_team VARCHAR, player VARCHAR)", "sql": "SELECT nhl_team FROM table_1213511_6 WHERE player = 'Pierre Duguay';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest average points per race entered of driver kimi räikkönen, who has more than 194 entries?", "schema": "CREATE TABLE table_name_3 (average_points_per_race_entered INTEGER, driver VARCHAR, entries VARCHAR)", "sql": "SELECT MIN(average_points_per_race_entered) FROM table_name_3 WHERE driver = 'kimi räikkönen' AND entries > 194;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "What is the number of startups founded by individuals from underrepresented racial or ethnic backgrounds in the tech sector?", "schema": "CREATE TABLE startup (id INT, name TEXT, industry TEXT, founding_date DATE, founder_race TEXT); INSERT INTO startup (id, name, industry, founding_date, founder_race) VALUES (1, 'AptDeco', 'E-commerce', '2014-02-14', 'Black'), (2, 'Blavity', 'Media', '2014-07-17', 'Black');", "sql": "SELECT COUNT(*) FROM startup WHERE industry = 'Tech' AND founder_race IN ('Black', 'Hispanic', 'Indigenous', 'Asian', 'Pacific Islander', 'Multiracial');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Time/Retired for the car going 42 Laps?", "schema": "CREATE TABLE table_name_2 (time_retired VARCHAR, laps VARCHAR)", "sql": "SELECT time_retired FROM table_name_2 WHERE laps = 42;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 86).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_composite( 'public', 'sometype', 'desc' ),\n false,\n 'hasnt_composite(sch, tab, desc)',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 99).", "schema": null, "sql": "select ten, grouping(ten) from onek\ngroup by (ten) having grouping(ten) >= 0\norder by 2,1;", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select ten, grouping(ten) from onek\ngroup by (ten) having grouping(ten) >= 0\norder by 2,1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "How many employees were hired in 2021?", "schema": "CREATE TABLE employees (id INT, name VARCHAR(50), hire_date DATE); INSERT INTO employees (id, name, hire_date) VALUES (1, 'John Doe', '2021-02-14'), (2, 'Jane Smith', '2021-05-02');", "sql": "SELECT COUNT(*) FROM employees WHERE YEAR(hire_date) = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Matches have a Rank of 2?", "schema": "CREATE TABLE table_name_94 (matches VARCHAR, rank VARCHAR)", "sql": "SELECT COUNT(matches) FROM table_name_94 WHERE rank = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "How many yellow cards were given to players in the last 30 days?", "schema": "CREATE TABLE players (player_id INT, player_name VARCHAR(255), team_id INT, yellow_cards INT, red_cards INT);", "sql": "SELECT SUM(yellow_cards) as total_yellow_cards FROM players WHERE yellow_cards > 0 AND player_id IN (SELECT player_id FROM players WHERE match_date >= DATEADD(day, -30, GETDATE()));", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 181, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'aggregates' (example 206).", "schema": null, "sql": "create temp table t1c () inherits (t1);", "explanation": "DDL from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "What is the longest song in the metal genre?", "schema": "CREATE TABLE songs (song_id INT, song_name VARCHAR(100), genre VARCHAR(50), length_sec INT); INSERT INTO songs (song_id, song_name, genre, length_sec) VALUES (1, 'Battery', 'metal', 506), (2, 'Master of Puppets', 'metal', 857), (3, 'Epic', 'rock', 909), (4, 'Stairway to Heaven', 'rock', 792), (5, 'In-A-Gadda-Da-Vida', 'rock', 1701);", "sql": "SELECT song_name, length_sec FROM songs WHERE genre = 'metal' ORDER BY length_sec DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "What is the average response time for emergency incidents in the city of Los Angeles, categorized by incident type?", "schema": "CREATE TABLE emergency_responses (id INT, incident_id INT, response_time INT); CREATE TABLE emergency_incidents (id INT, incident_type VARCHAR(255), report_date DATE); INSERT INTO emergency_incidents (id, incident_type, report_date) VALUES (1, 'Medical Emergency', '2022-01-01'), (2, 'Fire', '2022-01-02'); INSERT INTO emergency_responses (id, incident_id, response_time) VALUES (1, 1, 10), (2, 1, 12), (3, 2, 20);", "sql": "SELECT incident_type, AVG(response_time) FROM emergency_responses JOIN emergency_incidents ON emergency_responses.incident_id = emergency_incidents.id GROUP BY incident_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 174, "num_statements": 1} {"question": "Find the number of safety tests passed, failed, and total for each vehicle model.", "schema": "CREATE TABLE safety_tests (id INT, vehicle_model VARCHAR(20), test_result VARCHAR(10)); INSERT INTO safety_tests (id, vehicle_model, test_result) VALUES (1, 'Model X', 'Pass'), (2, 'Model X', 'Pass'), (3, 'Model Y', 'Fail');", "sql": "SELECT vehicle_model, COUNT(*) FILTER (WHERE test_result = 'Pass') AS passes, COUNT(*) FILTER (WHERE test_result = 'Fail') AS fails, COUNT(*) AS total FROM safety_tests GROUP BY vehicle_model;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 192, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many battles resulted in Battle of the Yellow Sea?", "schema": "CREATE TABLE table_26977890_1 (result VARCHAR, battles VARCHAR)", "sql": "SELECT COUNT(result) FROM table_26977890_1 WHERE battles = 'Battle of the Yellow Sea';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total number of artists and the total number of artworks in the database?", "schema": "CREATE TABLE artists (id INT, name VARCHAR(255), year_of_birth INT); CREATE TABLE artworks (id INT, artist_id INT, title VARCHAR(255), year_of_creation INT);", "sql": "SELECT (SELECT COUNT(*) FROM artists) AS total_artists, (SELECT COUNT(*) FROM artworks) AS total_artworks;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What European Cup is in Israeli Premier League?", "schema": "CREATE TABLE table_name_24 (european_cup VARCHAR, national_league VARCHAR)", "sql": "SELECT european_cup FROM table_name_24 WHERE national_league = 'israeli premier league';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which countries won by 9 strokes?", "schema": "CREATE TABLE table_1520559_1 (country VARCHAR, margin_of_victory VARCHAR)", "sql": "SELECT country FROM table_1520559_1 WHERE margin_of_victory = '9 strokes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What date was the game in Minneapolis?", "schema": "CREATE TABLE table_name_21 (date VARCHAR, city VARCHAR)", "sql": "SELECT date FROM table_name_21 WHERE city = 'minneapolis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "What is the number of new subscribers per day, by country, for the last 60 days?", "schema": "CREATE TABLE subscribers (subscriber_id INT, country VARCHAR(255), subscribe_date DATE); CREATE VIEW daily_subscribers AS SELECT country, DATE_TRUNC('day', subscribe_date) as date, COUNT(DISTINCT subscriber_id) as new_subscribers FROM subscribers WHERE subscribe_date >= DATEADD(day, -60, CURRENT_DATE) GROUP BY country, date;", "sql": "SELECT * FROM daily_subscribers;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "List the artists in the 'artists' table, grouped by their country of origin.", "schema": "CREATE TABLE artists (artist_id INT, name VARCHAR(50), country VARCHAR(50), genre VARCHAR(50), birth_year INT);", "sql": "SELECT country, COUNT(*) as artist_count FROM artists GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the winning score with a margin of victory of 4 strokes for the NEC Invitational tournament?", "schema": "CREATE TABLE table_name_93 (winning_score VARCHAR, margin_of_victory VARCHAR, tournament VARCHAR)", "sql": "SELECT winning_score FROM table_name_93 WHERE margin_of_victory = '4 strokes' AND tournament = 'nec invitational';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 146).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION col_default_is ( NAME, NAME, anyelement, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "What is the average calorie intake per meal for vegetarian customers in New York?", "schema": "CREATE TABLE Customers (CustomerID INT, Name VARCHAR(50), DietaryPreference VARCHAR(20), City VARCHAR(20)); INSERT INTO Customers VALUES (1, 'John Doe', 'Vegetarian', 'New York'); INSERT INTO Customers VALUES (2, 'Jane Smith', 'Non-Vegetarian', 'Los Angeles'); CREATE TABLE Meals (MealID INT, CustomerID INT, Calories INT); INSERT INTO Meals VALUES (1, 1, 600); INSERT INTO Meals VALUES (2, 1, 400); INSERT INTO Meals VALUES (3, 2, 800);", "sql": "SELECT AVG(Meals.Calories) FROM Customers INNER JOIN Meals ON Customers.CustomerID = Meals.CustomerID WHERE Customers.DietaryPreference = 'Vegetarian' AND Customers.City = 'New York';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many games did team iurbentia bilbao have with rebounds higher than 212?", "schema": "CREATE TABLE table_name_27 (games INTEGER, team VARCHAR, rebounds VARCHAR)", "sql": "SELECT SUM(games) FROM table_name_27 WHERE team = 'iurbentia bilbao' AND rebounds > 212;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Show the average number of hospital beds in rural hospitals with at least 100 beds, grouped by hospital location state.", "schema": "CREATE TABLE hospitals (hospital_id INT, hospital_name TEXT, beds INT, rural BOOLEAN, state_id INT); INSERT INTO hospitals (hospital_id, hospital_name, beds, rural, state_id) VALUES (1, 'Hospital A', 100, true, 1); CREATE TABLE states (state_id INT, state TEXT); INSERT INTO states (state_id, state) VALUES (1, 'Alabama'), (2, 'Alaska');", "sql": "SELECT states.state, AVG(hospitals.beds) avg_beds FROM hospitals JOIN states ON hospitals.state_id = states.state_id WHERE hospitals.rural = true AND hospitals.beds >= 100 GROUP BY states.state;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 194, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did Oliver Jarvis have pole position?", "schema": "CREATE TABLE table_21321935_2 (date VARCHAR, pole_position VARCHAR)", "sql": "SELECT date FROM table_21321935_2 WHERE pole_position = 'Oliver Jarvis';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "Which restaurants had a daily revenue above $500 on Valentine's Day 2022?", "schema": "CREATE TABLE revenue (restaurant_name TEXT, daily_revenue NUMERIC, date DATE); INSERT INTO revenue (restaurant_name, daily_revenue, date) VALUES ('ABC Bistro', 600, '2022-02-14'), ('DEF Diner', 400, '2022-02-14'), ('GHI Grill', 300, '2022-02-14'), ('JKL Bistro', 550, '2022-02-14');", "sql": "SELECT restaurant_name FROM revenue WHERE daily_revenue > 500 AND date = '2022-02-14';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the total cost of humanitarian assistance provided by the EU to South American countries since 2016?", "schema": "CREATE TABLE eu_humanitarian_assistance (donor VARCHAR(255), recipient VARCHAR(255), cost DECIMAL(10, 2), assistance_date DATE);", "sql": "SELECT SUM(cost) FROM eu_humanitarian_assistance WHERE donor = 'EU' AND recipient LIKE 'South America%' AND assistance_date >= '2016-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 340).", "schema": null, "sql": "select string_to_array('1|2|3', '');", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select string_to_array('1|2|3', '')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "What is the minimum budget dedicated to technology for social good by organizations in the Latin America region?", "schema": "CREATE TABLE Organizations (id INT, name TEXT, region TEXT, budget_social_good FLOAT); INSERT INTO Organizations (id, name, region, budget_social_good) VALUES (1, 'Tech4Good', 'Latin America', 700000), (2, 'SocialImpactLab', 'Latin America', 500000), (3, 'GreenInnovations', 'North America', 900000), (4, 'CommunityTech', 'Europe', 800000), (5, 'TechForAll', 'Latin America', 600000);", "sql": "SELECT MIN(budget_social_good) FROM Organizations WHERE region = 'Latin America';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the team for arif asadov", "schema": "CREATE TABLE table_22848931_3 (team VARCHAR, replaced_by VARCHAR)", "sql": "SELECT team FROM table_22848931_3 WHERE replaced_by = 'Arif Asadov';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "What is the total funding received by companies founded in 2017, ordered by the amount of funding?", "schema": "CREATE TABLE Funding (company_id INT, funding_year INT, amount INT); INSERT INTO Funding (company_id, funding_year, amount) VALUES (1, 2015, 3000000); INSERT INTO Funding (company_id, funding_year, amount) VALUES (2, 2017, 5000000); INSERT INTO Funding (company_id, funding_year, amount) VALUES (3, 2017, 7000000);", "sql": "SELECT company_id, SUM(amount) as total_funding FROM Funding WHERE funding_year = 2017 GROUP BY company_id ORDER BY total_funding DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who played the Rams on October 2, 2005?", "schema": "CREATE TABLE table_name_22 (opponent VARCHAR, date VARCHAR)", "sql": "SELECT opponent FROM table_name_22 WHERE date = 'october 2, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 379).", "schema": null, "sql": "SELECT * FROM check_test(\n volatility_is( 'oww', 'immutable', 'whatever' ),\n true,\n 'function_volatility(func, immutable, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 164, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the most recent year founded that has a nickname of bruins?", "schema": "CREATE TABLE table_name_7 (founded INTEGER, nickname VARCHAR)", "sql": "SELECT MAX(founded) FROM table_name_7 WHERE nickname = 'bruins';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "get the average age of athletes in the basketball team", "schema": "CREATE TABLE athletes (id INT PRIMARY KEY, name VARCHAR(100), age INT, sport VARCHAR(50), team VARCHAR(50));", "sql": "SELECT AVG(age) FROM athletes WHERE sport = 'Basketball' AND team = 'Chicago Bulls';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the maximum speed reached by an electric vehicle in a shared fleet in San Francisco?", "schema": "CREATE TABLE shared_ev (vehicle_id INT, trip_id INT, trip_start_time TIMESTAMP, trip_end_time TIMESTAMP, start_latitude DECIMAL(9,6), start_longitude DECIMAL(9,6), end_latitude DECIMAL(9,6), end_longitude DECIMAL(9,6), distance DECIMAL(10,2), max_speed DECIMAL(5,2));", "sql": "SELECT MAX(max_speed) FROM shared_ev WHERE start_longitude BETWEEN -122.6 AND -121.9 AND start_latitude BETWEEN 37.6 AND 38.1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the Border Conference was held, who was the tournament winner?", "schema": "CREATE TABLE table_22733636_1 (tournament_winner VARCHAR, conference VARCHAR)", "sql": "SELECT tournament_winner FROM table_22733636_1 WHERE conference = 'Border conference';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Find the difference in costs between the first and last project in 'Road_Infrastructure' table.", "schema": "CREATE TABLE Road_Infrastructure (id INT, project_name VARCHAR(50), location VARCHAR(50), cost INT);", "sql": "SELECT (MAX(cost) - MIN(cost)) AS cost_difference FROM (SELECT cost FROM Road_Infrastructure ORDER BY id LIMIT 1 OFFSET 1) AS first_project CROSS JOIN (SELECT cost FROM Road_Infrastructure ORDER BY id DESC LIMIT 1) AS last_project;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 1} {"question": "Delete records in circular_economy table where initiative is 'Composting Program'", "schema": "CREATE TABLE circular_economy (id INT PRIMARY KEY, location VARCHAR(255), initiative VARCHAR(255), start_date DATE, end_date DATE);", "sql": "DELETE FROM circular_economy WHERE initiative = 'Composting Program';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "List all farms in the 'farms' table and their corresponding region.", "schema": "CREATE TABLE farms (id INT, name TEXT, region TEXT); INSERT INTO farms (id, name, region) VALUES (1, 'Farm A', 'Asia-Pacific'); INSERT INTO farms (id, name, region) VALUES (2, 'Farm B', 'Europe'); INSERT INTO farms (id, name, region) VALUES (3, 'Farm C', 'Asia-Pacific'); INSERT INTO farms (id, name, region) VALUES (4, 'Farm D', 'Europe');", "sql": "SELECT name, region FROM farms;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "pgTAP test for Performs Within (assertion 4).", "schema": null, "sql": "SELECT * FROM check_test(\n performs_within( 'SELECT TRUE', 99.99, 99.99 ),\n true,\n 'simple select numeric',\n 'Should run within 99.99 +/- 99.99 ms',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Performs Within.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the minimum donation amount made by donors from each country in Q3 of 2021?", "schema": "CREATE TABLE Donors (id INT, name TEXT, country TEXT, donation FLOAT, quarter TEXT, year INT); INSERT INTO Donors (id, name, country, donation, quarter, year) VALUES (1, 'Charlie', 'USA', 120.0, 'Q3', 2021), (2, 'David', 'Mexico', 90.0, 'Q3', 2021), (3, 'Eve', 'Canada', 110.0, 'Q3', 2021), (4, 'Frank', 'USA', 130.0, 'Q3', 2021);", "sql": "SELECT country, MIN(donation) FROM Donors WHERE quarter = 'Q3' AND year = 2021 GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "What is the average diversity score of startups that have received funding in the last 3 years, grouped by industry?", "schema": "CREATE TABLE company (id INT, name TEXT, industry TEXT, founding_year INT, diversity_score INT); INSERT INTO company (id, name, industry, founding_year, diversity_score) VALUES (1, 'InnoTech', 'Tech', 2018, 80); INSERT INTO company (id, name, industry, founding_year, diversity_score) VALUES (2, 'GreenEnergy', 'Energy', 2019, 90); INSERT INTO company (id, name, industry, founding_year, diversity_score) VALUES (3, 'EduTech', 'Education', 2020, 85); CREATE TABLE funding (company_id INT, amount INT, funding_date DATE); INSERT INTO funding (company_id, amount, funding_date) VALUES (1, 1500000, '2021-01-01'); INSERT INTO funding (company_id, amount, funding_date) VALUES (2, 1200000, '2020-01-01'); INSERT INTO funding (company_id, amount, funding_date) VALUES (3, 800000, '2019-01-01');", "sql": "SELECT industry, AVG(diversity_score) FROM company INNER JOIN funding ON company.id = funding.company_id WHERE funding.funding_date >= DATEADD(year, -3, GETDATE()) GROUP BY industry;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_table_like' (example 60).", "schema": null, "sql": "INSERT INTO inhg (xx, yy, x) VALUES ('test', 5, 10);", "explanation": "DML from PostgreSQL core regression test for Create Table Like.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "What is the average cargo handling time for each port?", "schema": "CREATE TABLE cargo_handling (id INT, ship_id INT, port_id INT, handling_time INT); INSERT INTO cargo_handling (id, ship_id, port_id, handling_time) VALUES (1, 1, 1, 50), (2, 2, 2, 75), (3, 3, 1, 35); CREATE TABLE ports (id INT, name VARCHAR(50)); INSERT INTO ports (id, name) VALUES (1, 'Port of New York'), (2, 'Port of Los Angeles'), (3, 'Port of Miami');", "sql": "SELECT ports.name, AVG(cargo_handling.handling_time) FROM cargo_handling JOIN ports ON cargo_handling.port_id = ports.id GROUP BY ports.id, ports.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 151, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many players went to depaul?", "schema": "CREATE TABLE table_16494599_2 (player VARCHAR, school_club_team VARCHAR)", "sql": "SELECT COUNT(player) FROM table_16494599_2 WHERE school_club_team = 'DePaul';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "What is the distribution of employees by department and job level?", "schema": "CREATE TABLE departments (dept_id INT, dept_name TEXT); INSERT INTO departments (dept_id, dept_name) VALUES (1, 'HR'), (2, 'IT'), (3, 'Sales'); CREATE TABLE employees (employee_id INT, name TEXT, salary INT, dept_id INT, job_level TEXT); INSERT INTO employees (employee_id, name, salary, dept_id, job_level) VALUES (1, 'Alice', 50000, 1, 'Manager'), (2, 'Bob', 60000, 2, 'Senior'), (3, 'Charlie', 55000, 1, 'Junior'), (4, 'Dave', 65000, 2, 'Senior'), (5, 'Eve', 52000, 1, 'Manager');", "sql": "SELECT dept_name, job_level, COUNT(*) AS num_employees FROM employees JOIN departments ON employees.dept_id = departments.dept_id GROUP BY dept_name, job_level;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "What is the minimum ocean acidification level ever recorded?", "schema": "CREATE TABLE ocean_acidification (id INT, avg_level FLOAT); INSERT INTO ocean_acidification (id, avg_level) VALUES (1, 7.5); INSERT INTO ocean_acidification (id, avg_level) VALUES (2, 8.0);", "sql": "SELECT MIN(avg_level) FROM ocean_acidification;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Delete the TalentAcquisition table", "schema": "CREATE TABLE TalentAcquisition (ApplicantID INT PRIMARY KEY, JobTitle VARCHAR(30), Department VARCHAR(20), ApplicationDate DATE);", "sql": "DROP TABLE TalentAcquisition;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Laid down for the Hyperion ship?", "schema": "CREATE TABLE table_name_55 (laid_down VARCHAR, ship VARCHAR)", "sql": "SELECT laid_down FROM table_name_55 WHERE ship = 'hyperion';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the place in Germany with a score of 69-69=138?", "schema": "CREATE TABLE table_name_24 (place VARCHAR, country VARCHAR, score VARCHAR)", "sql": "SELECT place FROM table_name_24 WHERE score = 69 - 69 = 138 AND country = 'germany';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 735).", "schema": null, "sql": "CREATE FUNCTION op_leak(record, record) RETURNS bool\n AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END'\n LANGUAGE plpgsql;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 151, "num_statements": 3} {"question": "How many patients have been treated in mental health facilities in Florida since 2017?", "schema": "CREATE TABLE patients (id INT, treatment_date DATE, state TEXT); INSERT INTO patients (id, treatment_date, state) VALUES (1, '2017-01-01', 'Florida'); INSERT INTO patients (id, treatment_date, state) VALUES (2, '2016-12-31', 'California');", "sql": "SELECT COUNT(*) FROM patients WHERE state = 'Florida' AND treatment_date >= '2017-01-01';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What visiting team has a record of 9–5–2?", "schema": "CREATE TABLE table_name_93 (visitor VARCHAR, record VARCHAR)", "sql": "SELECT visitor FROM table_name_93 WHERE record = '9–5–2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "What is the average cargo handling time for vessels in port 'Seattle'?", "schema": "CREATE TABLE cargo_handling (id INT, vessel_name VARCHAR(50), port VARCHAR(50), handling_time INT); INSERT INTO cargo_handling (id, vessel_name, port, handling_time) VALUES (1, 'Seattle Voyager', 'Seattle', 8), (2, 'Seattle Voyager', 'Seattle', 10);", "sql": "SELECT AVG(handling_time) FROM cargo_handling WHERE port = 'Seattle';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Which climate monitoring stations are located in the same country as 'Station A'?", "schema": "CREATE TABLE climate_monitoring_stations (id INT, station_name VARCHAR(255), country VARCHAR(255)); INSERT INTO climate_monitoring_stations (id, station_name, country) VALUES (1, 'Station A', 'canada'), (2, 'Station B', 'greenland'), (3, 'Station C', 'canada'), (4, 'Station D', 'norway');", "sql": "SELECT station_name FROM climate_monitoring_stations WHERE country = (SELECT country FROM climate_monitoring_stations WHERE station_name = 'Station A');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the last appearance for music city bowl", "schema": "CREATE TABLE table_2517159_1 (last_appearance VARCHAR, name_of_bowl VARCHAR)", "sql": "SELECT last_appearance FROM table_2517159_1 WHERE name_of_bowl = 'Music City Bowl';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Which explainable AI techniques are used in 'lime' tool?", "schema": "CREATE TABLE explainable_ai (tool VARCHAR(255), technique VARCHAR(255)); INSERT INTO explainable_ai (tool, technique) VALUES ('LIME', 'Local Surrogate Models'), ('SHAP', 'Additive Feature Attribution');", "sql": "SELECT technique FROM explainable_ai WHERE tool = 'LIME';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What purses had a winners share equal to $428650?", "schema": "CREATE TABLE table_1520559_1 (purse___$__ VARCHAR, winners_share__$_ VARCHAR)", "sql": "SELECT purse___$__ FROM table_1520559_1 WHERE winners_share__$_ = 428650;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 72).", "schema": null, "sql": "/****************************************************************************/\n-- Test col_default_is().\n\nSELECT * FROM check_test(\n col_default_is( 'public', 'sometab', 'name', ''::text, 'name should default to empty string' ),\n true,\n 'col_default_is( sch, tab, col, def, desc )',\n 'name should default to empty string',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 344, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_merge' (example 62).", "schema": null, "sql": "CREATE TABLE sales_dec2022 PARTITION OF sales_date FOR VALUES FROM (2021, 12, 1) TO (2022, 1, 1);", "explanation": "DDL from PostgreSQL core regression test for Partition Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the total production of Lanthanum in 2018 and 2019?", "schema": "CREATE TABLE production (year INT, element VARCHAR(10), quantity INT); INSERT INTO production (year, element, quantity) VALUES (2017, 'Lanthanum', 1500), (2018, 'Lanthanum', 1600), (2019, 'Lanthanum', 1700), (2020, 'Lanthanum', 1800);", "sql": "SELECT SUM(quantity) FROM production WHERE element = 'Lanthanum' AND year IN (2018, 2019);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'dict_int' (example 20).", "schema": null, "sql": "select ts_lexize('intdict', '196850350328');", "explanation": "Example query from the 'dict_int' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "What is the minimum adoption revenue of virtual reality hardware in Middle East?", "schema": "CREATE TABLE VRHardwareMiddleEast (HardwareID INT, HardwareName VARCHAR(100), AdoptionRevenue DECIMAL(10,2), Country VARCHAR(50)); INSERT INTO VRHardwareMiddleEast (HardwareID, HardwareName, AdoptionRevenue, Country) VALUES (1, 'VR Headset A', 700.00, 'Saudi Arabia'), (2, 'VR Headset B', 800.00, 'United Arab Emirates'), (3, 'VR Headset C', 900.00, 'Turkey');", "sql": "SELECT MIN(AdoptionRevenue) FROM VRHardwareMiddleEast WHERE Country = 'Middle East';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the total number of readers in each country?", "schema": "CREATE TABLE readers (id INT, name VARCHAR(50), age INT, gender VARCHAR(10), country VARCHAR(50));", "sql": "SELECT country, COUNT(*) as count FROM readers GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are all the fault descriptions and the fault status of all the faults recoreded in the logs?", "schema": "CREATE TABLE Fault_Log (fault_description VARCHAR, fault_log_entry_id VARCHAR); CREATE TABLE Fault_Log_Parts (fault_status VARCHAR, fault_log_entry_id VARCHAR)", "sql": "SELECT T1.fault_description, T2.fault_status FROM Fault_Log AS T1 JOIN Fault_Log_Parts AS T2 ON T1.fault_log_entry_id = T2.fault_log_entry_id;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 142, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What team has tony parker (10) as the high assists, kurt thomas (12) as the high rebounds?", "schema": "CREATE TABLE table_name_57 (team VARCHAR, high_assists VARCHAR, high_rebounds VARCHAR)", "sql": "SELECT team FROM table_name_57 WHERE high_assists = 'tony parker (10)' AND high_rebounds = 'kurt thomas (12)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the record for the game on April 9, 2008?", "schema": "CREATE TABLE table_name_9 (record VARCHAR, date VARCHAR)", "sql": "SELECT record FROM table_name_9 WHERE date = 'april 9, 2008';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is Japan's Area km²?", "schema": "CREATE TABLE table_name_45 (area_km² VARCHAR, country VARCHAR)", "sql": "SELECT area_km² FROM table_name_45 WHERE country = 'japan';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "List all donations made to the 'Education' program in the 'NonprofitDB' database, along with the donor's name and address.", "schema": "CREATE TABLE Donation (ID INT, Amount DECIMAL(10, 2), DonorID INT, ProgramID INT); INSERT INTO Donation (ID, Amount, DonorID, ProgramID) VALUES (1, 500.00, 1, 1), (2, 1000.00, 2, 1), (3, 250.00, 3, 1), (4, 750.00, 4, 2); CREATE TABLE Donor (ID INT, Name VARCHAR(255), Address VARCHAR(255)); INSERT INTO Donor (ID, Name, Address) VALUES (1, 'John Doe', '123 Main St'), (2, 'Jane Smith', '456 Elm St'), (3, 'Alice Johnson', '789 Oak St'), (4, 'Bob Brown', '321 Pine St');", "sql": "SELECT d.Amount, d.DonorID, donor.Name, donor.Address FROM Donation d JOIN Donor donor ON d.DonorID = donor.ID WHERE d.ProgramID = 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total roll with a decile less than 7, and an authority of state, in the Macraes Flat area?", "schema": "CREATE TABLE table_name_98 (roll VARCHAR, area VARCHAR, decile VARCHAR, authority VARCHAR)", "sql": "SELECT COUNT(roll) FROM table_name_98 WHERE decile = 7 AND authority = 'state' AND area = 'macraes flat';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Update the org_location of 'Habitat for Humanity' to 'San Francisco' in the 'Organizations' table", "schema": "CREATE TABLE Organizations (org_id INT, org_name TEXT, org_location TEXT);", "sql": "UPDATE Organizations SET org_location = 'San Francisco' WHERE org_name = 'Habitat for Humanity';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "List all policies that are related to mobility impairments and the corresponding policy advocates.", "schema": "CREATE TABLE Policies (PolicyID INT, PolicyName VARCHAR(50), PolicyType VARCHAR(50)); INSERT INTO Policies VALUES (1, 'Ramp Accessibility', 'Infrastructure'); CREATE TABLE PolicyAdvocates (AdvocateID INT, AdvocateName VARCHAR(50), PolicyID INT); INSERT INTO PolicyAdvocates VALUES (1, 'Jane Doe', 1); CREATE TABLE PolicyDetails (PolicyID INT, DisabilityType VARCHAR(50)); INSERT INTO PolicyDetails VALUES (1, 'Mobility Impairment');", "sql": "SELECT p.PolicyName, pa.AdvocateName FROM Policies p INNER JOIN PolicyDetails pd ON p.PolicyID = pd.PolicyID INNER JOIN PolicyAdvocates pa ON p.PolicyID = pa.PolicyID WHERE pd.DisabilityType = 'Mobility Impairment';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 215, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who did the team play against when a record of 3-1, #16 was achieved?", "schema": "CREATE TABLE table_21063459_1 (opponent VARCHAR, record VARCHAR)", "sql": "SELECT opponent FROM table_21063459_1 WHERE record = '3-1, #16';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What are the names of the biosensor technology development projects in Canada?", "schema": "CREATE SCHEMA if not exists biosensors;CREATE TABLE if not exists biosensors.projects (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50)); INSERT INTO biosensors.projects (id, name, country) VALUES (1, 'ProjectX', 'Germany'), (2, 'ProjectY', 'Canada'), (3, 'ProjectZ', 'USA');", "sql": "SELECT name FROM biosensors.projects WHERE country = 'Canada';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "List all defense diplomacy events that have taken place in the Asia-Pacific region in the last 5 years, along with the number of participating countries.", "schema": "CREATE TABLE DefenseDiplomacy (ID INT, EventName TEXT, EventDate DATE, ParticipatingCountries TEXT); INSERT INTO DefenseDiplomacy VALUES (1, 'Event 1', '2017-01-01', 'China, Japan, South Korea'); CREATE VIEW AsiaPacific AS SELECT Country FROM DefenseDiplomacy WHERE Country IN ('China', 'Japan', 'South Korea', 'Australia', 'India');", "sql": "SELECT EventName, ParticipatingCountries, COUNT(DISTINCT SUBSTRING_INDEX(ParticipatingCountries, ',', n)) as NumberOfCountries FROM DefenseDiplomacy d CROSS JOIN (SELECT numbers.N FROM (SELECT 1 as N UNION ALL SELECT 2 UNION ALL SELECT 3) numbers) n JOIN AsiaPacific ap ON d.Country = ap.Country WHERE EventDate BETWEEN DATEADD(year, -5, GETDATE()) AND GETDATE() GROUP BY EventName, ParticipatingCountries;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 406, "num_statements": 1} {"question": "What is the percentage of users who liked articles about 'sports' and also liked articles about 'entertainment'?", "schema": "CREATE TABLE users (id INT, name TEXT, likes INT); CREATE TABLE user_likes (user_id INT, article_id INT); CREATE TABLE articles (id INT, title TEXT, category TEXT);", "sql": "SELECT (COUNT(*) / (SELECT COUNT(*) FROM users)) * 100.0 AS percentage FROM user_likes JOIN users ON user_likes.user_id = users.id JOIN articles ON user_likes.article_id = articles.id WHERE articles.category = 'sports' INTERSECT SELECT user_likes.user_id FROM user_likes JOIN users ON user_likes.user_id = users.id JOIN articles ON user_likes.article_id = articles.id WHERE articles.category = 'entertainment';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 410, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What percentages of social democratic correspond to a 5.5% left bloc?", "schema": "CREATE TABLE table_1463383_1 (social_democratic VARCHAR, left_bloc VARCHAR)", "sql": "SELECT social_democratic FROM table_1463383_1 WHERE left_bloc = '5.5%';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: find the program owners that have some programs in both morning and night time.", "schema": "CREATE TABLE broadcast (program_id VARCHAR, Time_of_day VARCHAR); CREATE TABLE program (owner VARCHAR, program_id VARCHAR)", "sql": "SELECT t1.owner FROM program AS t1 JOIN broadcast AS t2 ON t1.program_id = t2.program_id WHERE t2.Time_of_day = 'Morning' INTERSECT SELECT t1.owner FROM program AS t1 JOIN broadcast AS t2 ON t1.program_id = t2.program_id WHERE t2.Time_of_day = 'Night';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 252, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the biggest series episode number whose production code is 2t7211?", "schema": "CREATE TABLE table_21979779_1 (no INTEGER, production_code VARCHAR)", "sql": "SELECT MAX(no) FROM table_21979779_1 WHERE production_code = '2T7211';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many problems does the product with the most problems have? List the number of the problems and product name.", "schema": "CREATE TABLE problems (product_id VARCHAR); CREATE TABLE product (product_name VARCHAR, product_id VARCHAR)", "sql": "SELECT COUNT(*), T1.product_name FROM product AS T1 JOIN problems AS T2 ON T1.product_id = T2.product_id GROUP BY T1.product_name ORDER BY COUNT(*) DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 161, "num_statements": 1} {"question": "List all the forest types with their respective wildlife habitat areas.", "schema": "CREATE TABLE forest_types (id INT, type VARCHAR(255)); CREATE TABLE wildlife_habitat (id INT, type VARCHAR(255), area INT); INSERT INTO forest_types (id, type) VALUES (1, 'Coniferous'), (2, 'Deciduous'), (3, 'Mangrove'); INSERT INTO wildlife_habitat (id, type, area) VALUES (1, 'Coniferous', 500), (2, 'Deciduous', 700), (3, 'Mangrove', 600);", "sql": "SELECT forest_types.type, wildlife_habitat.area FROM forest_types INNER JOIN wildlife_habitat ON forest_types.id = wildlife_habitat.id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Delete all records in the \"RuralInfrastructure\" table where type is 'dam' and the construction_year is before 1980", "schema": "CREATE TABLE RuralInfrastructure (id INT PRIMARY KEY, type VARCHAR(255), construction_year INT);", "sql": "DELETE FROM RuralInfrastructure WHERE type = 'dam' AND construction_year < 1980;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which province has the partido Socialista del Pueblo Extremeño party?", "schema": "CREATE TABLE table_name_31 (province VARCHAR, party VARCHAR)", "sql": "SELECT province FROM table_name_31 WHERE party = 'partido socialista del pueblo extremeño';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "What is the total number of artifacts and the total number of unique materials represented at each excavation site?", "schema": "CREATE TABLE ExcavationSites (SiteID int, SiteName varchar(50), Location varchar(50)); CREATE TABLE Artifacts (ArtifactID int, SiteID int, Material varchar(20), Description varchar(100));", "sql": "SELECT ExcavationSites.SiteName, COUNT(DISTINCT Artifacts.ArtifactID) AS NumberOfArtifacts, COUNT(DISTINCT Artifacts.Material) AS NumberOfMaterials FROM ExcavationSites INNER JOIN Artifacts ON ExcavationSites.SiteID = Artifacts.SiteID GROUP BY ExcavationSites.SiteName;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 269, "num_statements": 1} {"question": "What is the maximum number of threat indicators for the healthcare sector in the United States?", "schema": "CREATE TABLE threat_indicators (id INT, sector TEXT, country TEXT, confidence INT); INSERT INTO threat_indicators (id, sector, country, confidence) VALUES (1, 'Healthcare', 'USA', 85); INSERT INTO threat_indicators (id, sector, country, confidence) VALUES (2, 'Healthcare', 'USA', 70); INSERT INTO threat_indicators (id, sector, country, confidence) VALUES (3, 'Healthcare', 'Canada', 88);", "sql": "SELECT MAX(confidence) FROM threat_indicators WHERE sector = 'Healthcare' AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: In what competition was the score reported as 12.8 (80) - 8.7 (55)?", "schema": "CREATE TABLE table_name_30 (competition VARCHAR, score VARCHAR)", "sql": "SELECT competition FROM table_name_30 WHERE score = '12.8 (80) - 8.7 (55)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the highest number of games drawn, where the games played was less than 7?", "schema": "CREATE TABLE table_name_86 (drawn INTEGER, games INTEGER)", "sql": "SELECT MAX(drawn) FROM table_name_86 WHERE games < 7;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "What is the number of drugs approved for use in 'RegionH' between Q1 and Q3 of 2020?", "schema": "CREATE TABLE drug_approval(drug_name TEXT, region TEXT, approval_quarter INT); INSERT INTO drug_approval (drug_name, region, approval_quarter) VALUES ('DrugA', 'RegionX', 1), ('DrugB', 'RegionY', 2), ('DrugD', 'RegionH', 1), ('DrugC', 'RegionZ', 4), ('DrugE', 'RegionH', 3), ('DrugF', 'RegionH', 2);", "sql": "SELECT COUNT(*) FROM drug_approval WHERE region = 'RegionH' AND approval_quarter BETWEEN 1 AND 3;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "What is the maximum number of streams for R&B songs released in 2021?", "schema": "CREATE TABLE songs (song_id INT, genre VARCHAR(20), release_year INT, streams INT); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (1, 'R&B', 2021, 7000); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (2, 'R&B', 2021, 8000); INSERT INTO songs (song_id, genre, release_year, streams) VALUES (3, 'R&B', 2021, 9000);", "sql": "SELECT MAX(streams) FROM songs WHERE genre = 'R&B' AND release_year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the writer of the episode with a production code of 5aky04?", "schema": "CREATE TABLE table_26826304_1 (written_by VARCHAR, production_code VARCHAR)", "sql": "SELECT written_by FROM table_26826304_1 WHERE production_code = '5AKY04';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'subselect' (example 183).", "schema": null, "sql": "create temp table inner_text (c1 text, c2 text);", "explanation": "DDL from PostgreSQL core regression test for Subselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who directed the episode which originally aired February 4, 2005?", "schema": "CREATE TABLE table_228973_11 (directed_by VARCHAR, original_air_date VARCHAR)", "sql": "SELECT directed_by FROM table_228973_11 WHERE original_air_date = 'February 4, 2005';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What's the Nordbayern with a Württemberg of Union Böckingen in the year before 1932?", "schema": "CREATE TABLE table_name_92 (nordbayern VARCHAR, year VARCHAR, württemberg VARCHAR)", "sql": "SELECT nordbayern FROM table_name_92 WHERE year < 1932 AND württemberg = 'union böckingen';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which season has Caps lower than 36 for the Club of NAC Breda?", "schema": "CREATE TABLE table_name_35 (season VARCHAR, caps VARCHAR, club VARCHAR)", "sql": "SELECT season FROM table_name_35 WHERE caps < 36 AND club = 'nac breda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "What is the total cargo capacity for all vessels in the 'vessels' table that have an even ID?", "schema": "CREATE TABLE vessels (vessel_id INT, vessel_name VARCHAR(50), registry VARCHAR(50), capacity INT); INSERT INTO vessels (vessel_id, vessel_name, registry, capacity) VALUES (1, 'CSCL Globe', 'China', 197500), (2, 'OOCL Hong Kong', 'Hong Kong', 210000), (3, 'MSC Maya', 'Panama', 192240);", "sql": "SELECT SUM(capacity) FROM vessels WHERE MOD(vessel_id, 2) = 0;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 189).", "schema": null, "sql": "select jsonb_path_query('[1,\"2\",3]', '+$');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[1,\"2\",3]', '+$')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What score has houston texans as the opponent?", "schema": "CREATE TABLE table_name_62 (score VARCHAR, opponent VARCHAR)", "sql": "SELECT score FROM table_name_62 WHERE opponent = 'houston texans';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 299).", "schema": null, "sql": "INSERT INTO s2 (SELECT x, public.fipshash(x::text) FROM generate_series(-6,6) x);", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "What is the average market price of Neodymium produced in the US in 2020?", "schema": "CREATE TABLE Neodymium_Production (id INT, year INT, country VARCHAR(255), quantity FLOAT, market_price FLOAT);", "sql": "SELECT AVG(market_price) FROM Neodymium_Production WHERE year = 2020 AND country = 'USA';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the total number of military vehicles produced by companies based in the Asia-Pacific region in the 'military_vehicles' table?", "schema": "CREATE TABLE military_vehicles (company VARCHAR(50), region VARCHAR(50), production_year INT, quantity INT); INSERT INTO military_vehicles (company, region, production_year, quantity) VALUES ('Company A', 'Asia-Pacific', 2010, 500), ('Company B', 'Asia-Pacific', 2015, 700), ('Company C', 'Europe', 2012, 600), ('Company D', 'Americas', 2018, 800);", "sql": "SELECT SUM(quantity) FROM military_vehicles WHERE region = 'Asia-Pacific';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What office was law preservation ticket holder William E. Barron running for?", "schema": "CREATE TABLE table_name_51 (office VARCHAR, law_preservation_ticket VARCHAR)", "sql": "SELECT office FROM table_name_51 WHERE law_preservation_ticket = 'william e. barron';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Insert data into 'agricultural_projects' table", "schema": "CREATE TABLE agricultural_projects (id INT PRIMARY KEY, name VARCHAR(100), location VARCHAR(50), funding_source VARCHAR(50), start_date DATE, end_date DATE);", "sql": "INSERT INTO agricultural_projects (id, name, location, funding_source, start_date, end_date) VALUES (1, 'Solar Powered Irrigation', 'Rural Kenya', 'World Bank', '2022-01-01', '2023-12-31');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 189, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the lowest area for cannonvale that speaks afrikaans?", "schema": "CREATE TABLE table_name_31 (area__km_2__ INTEGER, most_spoken_language VARCHAR, place VARCHAR)", "sql": "SELECT MIN(area__km_2__) FROM table_name_31 WHERE most_spoken_language = 'afrikaans' AND place = 'cannonvale';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "Delete all records in the species table where the scientific_name is 'Quercus robur'", "schema": "CREATE TABLE species (id INT PRIMARY KEY, common_name TEXT, scientific_name TEXT, region TEXT); INSERT INTO species (id, common_name, scientific_name, region) VALUES (1, 'English Oak', 'Quercus robur', 'Europe');", "sql": "DELETE FROM species WHERE scientific_name = 'Quercus robur';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Find the earliest date of articles in 'Technology'", "schema": "CREATE TABLE articles (id INT, title VARCHAR(100), topic VARCHAR(50), date DATE); INSERT INTO articles (id, title, topic, date) VALUES (1, 'Article 1', 'Politics', '2021-01-01'); INSERT INTO articles (id, name, topic, date) VALUES (2, 'Article 2', 'Sports', '2021-01-02'); INSERT INTO articles (id, title, topic, date) VALUES (3, 'Article 3', 'Technology', '2021-01-03');", "sql": "SELECT MIN(date) as earliest_date FROM articles WHERE topic = 'Technology';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the total number of mental health parity violations that have been reported in each region in the last year?", "schema": "CREATE TABLE mental_health_parity (id INT PRIMARY KEY, incident_date DATE, incident_description TEXT, location TEXT, resolved BOOLEAN); CREATE VIEW regions AS SELECT location AS region FROM mental_health_parity GROUP BY location;", "sql": "SELECT region, COUNT(*) FROM mental_health_parity WHERE incident_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) GROUP BY region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When the second intermediate period of egypt is the ubaid period in mesopotamia how many early calcolithics are there?", "schema": "CREATE TABLE table_23537091_1 (early_chalcolithic VARCHAR, ubaid_period_in_mesopotamia VARCHAR)", "sql": "SELECT COUNT(early_chalcolithic) FROM table_23537091_1 WHERE ubaid_period_in_mesopotamia = 'Second Intermediate Period of Egypt';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the earliest year with an entry from Rotary Watches Stanley BRM and a BRM P207 with more than 0 points?", "schema": "CREATE TABLE table_name_99 (year INTEGER, points VARCHAR, entrant VARCHAR, chassis VARCHAR)", "sql": "SELECT MIN(year) FROM table_name_99 WHERE entrant = 'rotary watches stanley brm' AND chassis = 'brm p207' AND points > 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "How many complaints were received for each service type in the last month?", "schema": "CREATE TABLE service_complaints (complaint_id INT, complaint_type VARCHAR(50), complaint_date DATE); INSERT INTO service_complaints (complaint_id, complaint_type, complaint_date) VALUES (1, 'Mobile Data', '2022-03-01'), (2, 'Broadband Internet', '2022-03-15'), (3, 'VoIP', '2022-04-01');", "sql": "SELECT COUNT(*) as total_complaints, complaint_type FROM service_complaints WHERE complaint_date >= DATE_TRUNC('month', NOW()) - INTERVAL '1 month' GROUP BY complaint_type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When 5 is the l what is the lowest amount of stolen ends?", "schema": "CREATE TABLE table_29565541_2 (stolen_ends INTEGER, l VARCHAR)", "sql": "SELECT MIN(stolen_ends) FROM table_29565541_2 WHERE l = 5;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the try bonus for tries against is 70", "schema": "CREATE TABLE table_13741576_4 (try_bonus VARCHAR, tries_against VARCHAR)", "sql": "SELECT try_bonus FROM table_13741576_4 WHERE tries_against = '70';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "What is the maximum budget allocated to any digital divide initiative?", "schema": "CREATE TABLE div_initiatives (name TEXT, budget INTEGER); INSERT INTO div_initiatives (name, budget) VALUES ('DivBridge', 800000), ('CloseGap', 900000), ('ConnectWorld', 700000);", "sql": "SELECT MAX(budget) FROM div_initiatives;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "What is the average temperature recorded for each spacecraft during their respective missions?", "schema": "CREATE TABLE spacecraft_temperatures (spacecraft_name TEXT, mission_name TEXT, temperature FLOAT); INSERT INTO spacecraft_temperatures (spacecraft_name, mission_name, temperature) VALUES ('Voyager 1', 'Voyager 1 Mission', 80.5), ('Voyager 2', 'Voyager 2 Mission', 78.2), ('Cassini', 'Cassini Mission', 130.6);", "sql": "SELECT spacecraft_name, AVG(temperature) as avg_temperature FROM spacecraft_temperatures GROUP BY spacecraft_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Main use of emley moor tower (mk.3)?", "schema": "CREATE TABLE table_name_54 (main_use VARCHAR, name VARCHAR)", "sql": "SELECT main_use FROM table_name_54 WHERE name = 'emley moor tower (mk.3)';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "What is the most common art medium used by local artists?", "schema": "CREATE TABLE artists (id INT, name TEXT, city TEXT, country TEXT);CREATE TABLE art_pieces (id INT, title TEXT, medium TEXT, artist_id INT);", "sql": "SELECT a.city, ap.medium, COUNT(ap.id) as num_pieces FROM artists a JOIN art_pieces ap ON a.id = ap.artist_id GROUP BY a.city, ap.medium ORDER BY num_pieces DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "How many heritage sites in the Americas have more than 100,000 annual visitors?", "schema": "CREATE TABLE HeritageSites (id INT, name TEXT, region TEXT, annual_visitors INT); INSERT INTO HeritageSites (id, name, region, annual_visitors) VALUES (1, 'Machu Picchu', 'Americas', 120000), (2, 'Eiffel Tower', 'Europe', 7000000), (3, 'Chichen Itza', 'Americas', 1500000);", "sql": "SELECT COUNT(*) FROM HeritageSites WHERE region = 'Americas' AND annual_visitors > 100000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 83).", "schema": null, "sql": "SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Find the change in average property price per square foot from 2021 to 2022 in each neighborhood.", "schema": "CREATE TABLE neighborhoods (name VARCHAR(50), id INT, PRIMARY KEY (id)); INSERT INTO neighborhoods (name, id) VALUES ('Brewerytown', 1), ('Fairmount', 2); CREATE TABLE properties (id INT, neighborhood_id INT, price FLOAT, livable_square_feet INT, year INT, PRIMARY KEY (id), FOREIGN KEY (neighborhood_id) REFERENCES neighborhoods(id));", "sql": "SELECT a.neighborhood_id, AVG(a.price/a.livable_square_feet) - AVG(b.price/b.livable_square_feet) AS price_change_per_sqft FROM properties a JOIN properties b ON a.neighborhood_id = b.neighborhood_id WHERE a.year = 2022 AND b.year = 2021 GROUP BY a.neighborhood_id;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 265, "num_statements": 1} {"question": "What are the names and average delivery times of all cargoes that were transported by the vessel 'MSC Chariot' and unloaded at the port of Oakland?", "schema": "CREATE TABLE vessels(id INT, name VARCHAR(255)); INSERT INTO vessels VALUES (1, 'MSC Chariot'); CREATE TABLE cargo(id INT, name VARCHAR(255), delivery_time INT, vessel_id INT, port_id INT); CREATE TABLE ports(id INT, name VARCHAR(255));", "sql": "SELECT cargo.name, AVG(cargo.delivery_time) FROM cargo INNER JOIN vessels ON cargo.vessel_id = vessels.id INNER JOIN ports ON cargo.port_id = ports.id WHERE vessels.name = 'MSC Chariot' AND ports.name = 'Oakland' GROUP BY cargo.name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 233, "num_statements": 1} {"question": "What is the average tourist spending in Indonesia?", "schema": "CREATE TABLE Spending (id INT, country TEXT, year INT, spending FLOAT); INSERT INTO Spending (id, country, year, spending) VALUES (1, 'Indonesia', 2018, 1000), (2, 'Indonesia', 2019, 1200), (3, 'Indonesia', 2020, 800);", "sql": "SELECT AVG(spending) FROM Spending WHERE country = 'Indonesia';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the transit connections from Pioneer Square U?", "schema": "CREATE TABLE table_22771048_2 (transit_connections VARCHAR, station VARCHAR)", "sql": "SELECT transit_connections FROM table_22771048_2 WHERE station = 'Pioneer Square U';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average energy efficiency rating of buildings in New York and Chicago?", "schema": "CREATE TABLE building_energy (building_id INT, city VARCHAR(50), rating FLOAT); INSERT INTO building_energy (building_id, city, rating) VALUES (1, 'New York', 85.3), (2, 'Chicago', 78.9), (3, 'New York', 92.1), (4, 'Chicago', 88.7), (5, 'New York', 74.5), (6, 'Chicago', 69.2);", "sql": "SELECT AVG(rating) FROM building_energy WHERE city IN ('New York', 'Chicago');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the maximum memory speed for frequencies between 2.93-3.2ghz?", "schema": "CREATE TABLE table_24018112_1 (max_memory_speed VARCHAR, frequency VARCHAR)", "sql": "SELECT max_memory_speed FROM table_24018112_1 WHERE frequency = '2.93-3.2GHz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: WHAT IS THE SUM OF AST AVG WITH RANK 5 AND GAMES BIGGER THAN 108?", "schema": "CREATE TABLE table_name_20 (ast_avg INTEGER, rank VARCHAR, games VARCHAR)", "sql": "SELECT SUM(ast_avg) FROM table_name_20 WHERE rank = 5 AND games > 108;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What Constellation has a Object type of globular cluster and a NGC number of 5986?", "schema": "CREATE TABLE table_name_17 (constellation VARCHAR, object_type VARCHAR, ngc_number VARCHAR)", "sql": "SELECT constellation FROM table_name_17 WHERE object_type = 'globular cluster' AND ngc_number = 5986;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the production code for episode 96 in the series?", "schema": "CREATE TABLE table_2818164_5 (production_code INTEGER, no_in_series VARCHAR)", "sql": "SELECT MIN(production_code) FROM table_2818164_5 WHERE no_in_series = '96';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the Time/Retired for emerson fittipaldi?", "schema": "CREATE TABLE table_name_14 (time_retired VARCHAR, driver VARCHAR)", "sql": "SELECT time_retired FROM table_name_14 WHERE driver = 'emerson fittipaldi';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average account balance for clients in Oceania in Q2 2022?", "schema": "CREATE TABLE accounts (client_id INT, balance DECIMAL(10,2), country VARCHAR(50), account_date DATE); INSERT INTO accounts (client_id, balance, country, account_date) VALUES (1, 12000.00, 'Australia', '2022-04-01'), (2, 25000.00, 'New Zealand', '2022-05-15'), (3, 18000.00, 'Papua New Guinea', '2022-06-01');", "sql": "SELECT AVG(balance) as avg_balance FROM accounts WHERE country IN ('Australia', 'New Zealand', 'Papua New Guinea') AND account_date BETWEEN '2022-04-01' AND '2022-06-30';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "What is the total volume of oil spilled in the Gulf of Mexico and the Arabian Sea?", "schema": "CREATE TABLE oil_spills(region VARCHAR(255), volume FLOAT);INSERT INTO oil_spills(region, volume) VALUES ('Gulf of Mexico', 4000000), ('Arabian Sea', 2000000);", "sql": "SELECT SUM(volume) FROM oil_spills WHERE region IN ('Gulf of Mexico', 'Arabian Sea');", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name where bethany college is", "schema": "CREATE TABLE table_262476_1 (location VARCHAR, institution VARCHAR)", "sql": "SELECT location FROM table_262476_1 WHERE institution = 'Bethany College';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the example when the environment is #_x", "schema": "CREATE TABLE table_name_46 (example VARCHAR, environment VARCHAR)", "sql": "SELECT example FROM table_name_46 WHERE environment = '#_x';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 171).", "schema": null, "sql": "SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the score for fiji", "schema": "CREATE TABLE table_name_99 (score VARCHAR, country VARCHAR)", "sql": "SELECT score FROM table_name_99 WHERE country = 'fiji';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Show an example of PostgreSQL INSERT (example 6).", "schema": null, "sql": "INSERT INTO films SELECT * FROM tmp_films WHERE date_prod < '2004-05-07';", "explanation": "PostgreSQL INSERT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which FA Cup apps has league goals of 1 with total goals less than 1?", "schema": "CREATE TABLE table_name_47 (fa_cup_apps INTEGER, league_goals VARCHAR, total_goals VARCHAR)", "sql": "SELECT AVG(fa_cup_apps) FROM table_name_47 WHERE league_goals = 1 AND total_goals < 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "What is the number of vessels inspected in the Caribbean sea?", "schema": "CREATE TABLE vessels (name VARCHAR(255), type VARCHAR(255), flag_state VARCHAR(255)); CREATE TABLE inspections (inspection_id INT, vessel_name VARCHAR(255), inspection_date DATE, region VARCHAR(255)); CREATE TABLE caribbean_sea (name VARCHAR(255), region_type VARCHAR(255)); INSERT INTO vessels (name, type, flag_state) VALUES ('VESSEL1', 'Cargo', 'Italy'), ('VESSEL2', 'Passenger', 'Spain'); INSERT INTO inspections (inspection_id, vessel_name, inspection_date, region) VALUES (1, 'VESSEL1', '2022-01-01', 'Caribbean Sea'), (2, 'VESSEL3', '2022-02-01', 'Caribbean Sea'); INSERT INTO caribbean_sea (name, region_type) VALUES ('VESSEL1', 'Caribbean Sea');", "sql": "SELECT COUNT(*) FROM inspections i INNER JOIN caribbean_sea cs ON i.region = cs.region;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What week was the opponent the San Diego Chargers?", "schema": "CREATE TABLE table_name_71 (week INTEGER, opponent VARCHAR)", "sql": "SELECT AVG(week) FROM table_name_71 WHERE opponent = 'san diego chargers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Which community has the lowest obesity rate in Canada?", "schema": "CREATE TABLE Community (Name TEXT, Country TEXT, ObesityRate FLOAT); INSERT INTO Community (Name, Country, ObesityRate) VALUES ('Community A', 'Canada', 20.0); INSERT INTO Community (Name, Country, ObesityRate) VALUES ('Community B', 'US', 25.0);", "sql": "SELECT Name, ObesityRate FROM Community WHERE Country = 'Canada' ORDER BY ObesityRate ASC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_info': Write the SELECT query (example 16).", "schema": null, "sql": "SELECT pg_partition_root('ptif_test01');", "explanation": "Regression test for Partition Info in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_partition_root('ptif_test01')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "PostgreSQL regression test 'transactions': Write the SELECT query (example 105).", "schema": null, "sql": "SELECT * FROM savepoints;", "explanation": "Regression test for Transactions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM savepoints) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "What is the average retail price of organic apples sold in the United States?", "schema": "CREATE TABLE OrganicFruitsPrices (fruit_name TEXT, country TEXT, price NUMERIC); INSERT INTO OrganicFruitsPrices (fruit_name, country, price) VALUES ('Apples', 'United States', 2.95);", "sql": "SELECT AVG(price) FROM OrganicFruitsPrices WHERE fruit_name = 'Apples' AND country = 'United States';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When Benall DFL is Goorambat with less than 13 wins, what is the least amount of losses?", "schema": "CREATE TABLE table_name_15 (losses INTEGER, benalla_dfl VARCHAR, wins VARCHAR)", "sql": "SELECT MIN(losses) FROM table_name_15 WHERE benalla_dfl = 'goorambat' AND wins < 13;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "What is the average energy efficiency rating for buildings constructed before 2000 in the 'energy_efficiency' schema?", "schema": "CREATE TABLE energy_efficiency (id INT, building_id INT, rating FLOAT, construction_year INT); INSERT INTO energy_efficiency (id, building_id, rating, construction_year) VALUES (1, 1, 8.5, 1995), (2, 2, 9.2, 1999), (3, 3, 6.8, 2002);", "sql": "SELECT AVG(rating) FROM energy_efficiency WHERE construction_year < 2000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: what is the date when the home team is st kilda?", "schema": "CREATE TABLE table_name_26 (date VARCHAR, home_team VARCHAR)", "sql": "SELECT date FROM table_name_26 WHERE home_team = 'st kilda';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Which cities have more than 2000 affordable housing units?", "schema": "CREATE TABLE affordable_housing (id INT, city VARCHAR(50), num_units INT); INSERT INTO affordable_housing (id, city, num_units) VALUES (1, 'Chicago', 3000), (2, 'Houston', 1500), (3, 'Chicago', 2500);", "sql": "SELECT city FROM affordable_housing GROUP BY city HAVING SUM(num_units) > 2000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total number of education volunteers?", "schema": "CREATE TABLE Volunteers (id INT, program VARCHAR(25), volunteer_count INT); INSERT INTO Volunteers (id, program, volunteer_count) VALUES (1, 'Education', 50), (2, 'Health', 75), (3, 'Environment', 100), (4, 'Arts', 80), (5, 'Social Services', 120), (6, 'Education', 30), (7, 'Health', 60), (8, 'Environment', 80), (9, 'Arts', 90), (10, 'Social Services', 110);", "sql": "SELECT SUM(volunteer_count) as total_education_volunteers FROM Volunteers WHERE program = 'Education';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance during the week 1 match?", "schema": "CREATE TABLE table_name_2 (attendance VARCHAR, week VARCHAR)", "sql": "SELECT attendance FROM table_name_2 WHERE week = 1;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "What is the total biomass of fish in farms located in the Pacific Ocean and using recirculating aquaculture systems?", "schema": "CREATE TABLE Farm (farm_id INT, location VARCHAR(255), system_type VARCHAR(255), PRIMARY KEY(farm_id)); INSERT INTO Farm (farm_id, location, system_type) VALUES (1, 'Pacific Ocean', 'Recirculating'), (2, 'Atlantic Ocean', 'Flow-through'), (3, 'Mediterranean Sea', 'Recirculating'); CREATE TABLE Fish (fish_id INT, farm_id INT, biomass FLOAT), (4, 1, 500), (5, 1, 600), (6, 3, 700);", "sql": "SELECT SUM(f.biomass) FROM Fish f INNER JOIN Farm ff ON f.farm_id = ff.farm_id WHERE ff.location = 'Pacific Ocean' AND ff.system_type = 'Recirculating';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 152, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER TABLE (example 17).", "schema": null, "sql": "ALTER TABLE distributors ADD CONSTRAINT distfk FOREIGN KEY (address) REFERENCES addresses (address);", "explanation": "PostgreSQL ALTER TABLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the total number of rank for 348cc petty manx", "schema": "CREATE TABLE table_name_54 (rank VARCHAR, team VARCHAR)", "sql": "SELECT COUNT(rank) FROM table_name_54 WHERE team = '348cc petty manx';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a SQL definition from the citus project (foreign_key_to_reference_table, item 9).", "schema": null, "sql": "CREATE VIEW table_fkeys_in_workers AS\nSELECT\n(json_populate_record(NULL::foreign_details,\n json_array_elements_text((run_command_on_workers( $$\n SELECT\n COALESCE(json_agg(row_to_json(d)), '[]'::json)\n FROM\n (\n SELECT\n distinct name,\n relid::regclass::text,\n refd_relid::regclass::text\n FROM\n table_fkey_cols\n WHERE\n \"schema\" = 'fkey_reference_table'\n )\n d $$ )).RESULT::json )::json )).* ;", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": true, "sql_length": 482, "num_statements": 1} {"question": "Insert a new record into the 'manufacturing_output' table with the chemical_name 'He' and quantity 1000, if 'He' is not already present in the 'chemicals' table.", "schema": "CREATE TABLE manufacturing_output (id INT, chemical_name VARCHAR(255), quantity INT); CREATE TABLE chemicals (id INT, chemical_name VARCHAR(255), safety_rating INT); INSERT INTO chemicals (id, chemical_name, safety_rating) VALUES (1, 'H2O', 90), (2, 'CO2', 70); INSERT INTO manufacturing_output (id, chemical_name, quantity) VALUES (1, 'H2O', 1000), (2, 'CO2', 2000);", "sql": "INSERT INTO manufacturing_output (chemical_name, quantity) SELECT 'He' as chemical_name, 1000 as quantity FROM chemicals WHERE chemical_name = 'He' LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 156, "num_statements": 1} {"question": "What is the maximum number of buses per station in the 'south' region?", "schema": "CREATE TABLE station_counts (region VARCHAR(10), num_stations INT, num_buses INT); INSERT INTO station_counts (region, num_stations, num_buses) VALUES ('east', 10, 50), ('west', 12, 60), ('north', 15, 75), ('south', 8, 40);", "sql": "SELECT MAX(num_buses/num_stations) FROM station_counts WHERE region = 'south';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "what is the total number of electric vehicles in the world?", "schema": "CREATE TABLE electric_vehicles (vehicle_id INT, vehicle_type VARCHAR(255), city VARCHAR(255), state VARCHAR(255));", "sql": "SELECT COUNT(*) FROM electric_vehicles;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 118).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (2,4,'-994877520673428596810678826533995.79421257464236160757218576989993781147390382997132644206786872350652200243563770552469933194637146474528320738725486418004701192337175478117026439697031462361180324038544450723753402846519731908503949116978812841497201119103409772457270340059605961197538918709309004130294868847110690336360689446090125918336908930881873778405661757289469281163974774492810850778950071063044769131228124355961427111369335109426492177657001035045332525699055300921341010989742896430768506909949340276549373661076950964959025967328861569387160956730002517417236732463510495205173523163676450203614971844583064927040066684531931069310935516821795449174271052747559395296525950219449541557191520903507653089998307641491381797101485104546410643');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 797, "num_statements": 1} {"question": "List all unique train routes in Tokyo with more than 1000 riders per day", "schema": "CREATE TABLE tokyo_train (route_id INT, num_riders INT, route_name VARCHAR(255));", "sql": "SELECT DISTINCT route_id, route_name FROM tokyo_train WHERE num_riders > 1000;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the original air date for season number 2?", "schema": "CREATE TABLE table_21781578_2 (original_air_date VARCHAR, season_no VARCHAR)", "sql": "SELECT original_air_date FROM table_21781578_2 WHERE season_no = 2;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "List all cities with their total energy consumption and the number of buildings in the 'RenewableEnergy' schema, grouped by continent.", "schema": "CREATE TABLE RenewableEnergy.CityEnergy (city VARCHAR(50), energy_consumption FLOAT, num_buildings INT, continent VARCHAR(50)); INSERT INTO RenewableEnergy.CityEnergy (city, energy_consumption, num_buildings, continent) VALUES ('Sao Paulo', 123456.7, 2000, 'South America'), ('Mexico City', 150000.2, 3000, 'North America'), ('Rio de Janeiro', 105000.1, 1500, 'South America'), ('Buenos Aires', 140000.5, 2500, 'South America'), ('Lima', 180000.9, 3500, 'South America');", "sql": "SELECT continent, city, SUM(energy_consumption) AS total_energy, num_buildings FROM RenewableEnergy.CityEnergy GROUP BY continent, city;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 136, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the label of release we are the rage with a cd format?", "schema": "CREATE TABLE table_name_28 (label VARCHAR, format VARCHAR, release VARCHAR)", "sql": "SELECT label FROM table_name_28 WHERE format = 'cd' AND release = 'we are the rage';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many laps did Jo Bonnier driver when the grid number was smaller than 11?", "schema": "CREATE TABLE table_name_6 (laps INTEGER, driver VARCHAR, grid VARCHAR)", "sql": "SELECT SUM(laps) FROM table_name_6 WHERE driver = 'jo bonnier' AND grid < 11;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 186).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who was the co-driver in 1970 for the race of Targa Florio?", "schema": "CREATE TABLE table_name_46 (co_driver VARCHAR, year VARCHAR, race VARCHAR)", "sql": "SELECT co_driver FROM table_name_46 WHERE year = 1970 AND race = 'targa florio';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 206).", "schema": null, "sql": "select hstore_to_array('aa=>1, cq=>l, b=>g, fg=>NULL'::hstore);", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 100).", "schema": null, "sql": "SELECT '0 .. 1'::seg &< '0 .. 0.5'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What was the attendance for a week larger than 3, and an opponent of philadelphia eagles?", "schema": "CREATE TABLE table_name_58 (attendance VARCHAR, week VARCHAR, opponent VARCHAR)", "sql": "SELECT attendance FROM table_name_58 WHERE week > 3 AND opponent = 'philadelphia eagles';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "What is the maximum water consumption in a single day for all wastewater treatment plants?", "schema": "CREATE TABLE wastewater_treatment_plants (plant_id INT, daily_consumption FLOAT, consumption_date DATE); INSERT INTO wastewater_treatment_plants (plant_id, daily_consumption, consumption_date) VALUES (1, 1000, '2022-03-01'), (2, 1500, '2022-03-02'), (3, 1200, '2022-03-03');", "sql": "SELECT MAX(daily_consumption) FROM wastewater_treatment_plants;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the name of the second who has Caroline Reed as third?", "schema": "CREATE TABLE table_name_96 (second VARCHAR, third VARCHAR)", "sql": "SELECT second FROM table_name_96 WHERE third = 'caroline reed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What is the total number of drivers who have cars constructed by Mercedes-Benz?", "schema": "CREATE TABLE table_18893428_1 (driver VARCHAR, constructor VARCHAR)", "sql": "SELECT COUNT(driver) FROM table_18893428_1 WHERE constructor = 'Mercedes-Benz';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: how many time was points 6675?", "schema": "CREATE TABLE table_26218783_7 (points VARCHAR)", "sql": "SELECT COUNT(points) AS won FROM table_26218783_7 WHERE points = 6675;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Player has a To par of +9?", "schema": "CREATE TABLE table_name_8 (player VARCHAR, to_par VARCHAR)", "sql": "SELECT player FROM table_name_8 WHERE to_par = '+9';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the least bronze for silver being less than 0", "schema": "CREATE TABLE table_name_45 (bronze INTEGER, silver INTEGER)", "sql": "SELECT MIN(bronze) FROM table_name_45 WHERE silver < 0;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "What was the average monthly donation amount in the 'Health' project category for the past 12 months?", "schema": "CREATE TABLE donations (donation_id INT, donation_amount DECIMAL, donation_date DATE, project_category VARCHAR(255)); INSERT INTO donations (donation_id, donation_amount, donation_date, project_category) VALUES (1, 500, '2022-01-05', 'Health'), (2, 300, '2022-01-10', 'Health'), (3, 700, '2022-02-15', 'Environment');", "sql": "SELECT AVG(donation_amount) / 12 as avg_monthly_donation FROM donations WHERE project_category = 'Health' AND donation_date BETWEEN DATE_SUB(CURDATE(), INTERVAL 12 MONTH) AND CURDATE();", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: who was the winner /2nd for the British Champion Stakes race?", "schema": "CREATE TABLE table_24850630_4 (winner_2nd VARCHAR, race VARCHAR)", "sql": "SELECT winner_2nd FROM table_24850630_4 WHERE race = 'British Champion Stakes';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "What is the total number of military innovation projects for each department in the 'military_innovation' and 'departments' tables?", "schema": "CREATE TABLE departments (department_id INT, department_name VARCHAR(50)); CREATE TABLE military_innovation (innovation_id INT, innovation_name VARCHAR(50), department_id INT); INSERT INTO departments VALUES (1, 'Research'), (2, 'Development'), (3, 'Procurement'); INSERT INTO military_innovation VALUES (1, 'Stealth Technology', 1), (2, 'Advanced Radar Systems', 1), (3, 'Electric Armored Vehicles', 2), (4, 'Automated Turrets', 2), (5, 'Conventional Rifles', 3), (6, 'Body Armor', 3);", "sql": "SELECT d.department_name, COUNT(mi.innovation_id) as total_projects FROM departments d JOIN military_innovation mi ON d.department_id = mi.department_id GROUP BY d.department_name;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "Which regions had the highest total donation amounts in Q2 2021?", "schema": "CREATE TABLE regions (id INT, name VARCHAR(255)); INSERT INTO regions (id, name) VALUES (1, 'North'), (2, 'South'), (3, 'East'), (4, 'West'); CREATE TABLE donations (id INT, region_id INT, amount DECIMAL(10,2), donation_date DATE); INSERT INTO donations (id, region_id, amount, donation_date) VALUES (1, 2, 500.00, '2021-04-01'), (2, 4, 800.00, '2021-04-05'), (3, 1, 300.00, '2021-03-27'), (4, 2, 700.00, '2021-05-16'), (5, 3, 600.00, '2021-04-23'), (6, 4, 900.00, '2021-06-01');", "sql": "SELECT region_id, SUM(amount) as total_donations FROM donations WHERE donation_date BETWEEN '2021-04-01' AND '2021-06-30' GROUP BY region_id ORDER BY total_donations DESC;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 171, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'collate' (example 109).", "schema": null, "sql": "CREATE COLLATION coll_dup_chk (LC_CTYPE = \"POSIX\", LC_CTYPE = \"NONSENSE\", LC_COLLATE = \"POSIX\");", "explanation": "DDL from PostgreSQL core regression test for Collate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Update the number of followers for a user from Brazil", "schema": "CREATE TABLE users (id INT, username VARCHAR(255), followers INT, country VARCHAR(255));", "sql": "UPDATE users SET followers = followers + 100 WHERE username = 'user_brazil' AND country = 'Brazil';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: How many locations have a school that is nicknamed the Panthers?", "schema": "CREATE TABLE table_262560_2 (location VARCHAR, nickname VARCHAR)", "sql": "SELECT COUNT(location) FROM table_262560_2 WHERE nickname = 'Panthers';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "What is the most popular activity among male members in their 50s?", "schema": "CREATE TABLE members (id INT, age INT, gender VARCHAR(10)); INSERT INTO members (id, age, gender) VALUES (1, 52, 'Male'); CREATE TABLE workouts (id INT, member_id INT, activity VARCHAR(50), duration INT); INSERT INTO workouts (id, member_id, activity, duration) VALUES (1, 1, 'Swimming', 60);", "sql": "SELECT activity, COUNT(*) AS count FROM members JOIN workouts ON members.id = workouts.member_id WHERE members.gender = 'Male' AND members.age BETWEEN 50 AND 59 GROUP BY activity ORDER BY count DESC LIMIT 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "Update the 'capacity_mw' value to 30 in the 'energy_storage' table where the 'technology' is 'Lithium-ion'", "schema": "CREATE TABLE energy_storage (id INT PRIMARY KEY, technology VARCHAR(255), capacity_mw FLOAT, country VARCHAR(255));", "sql": "UPDATE energy_storage SET capacity_mw = 30 WHERE technology = 'Lithium-ion';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Who is the Opponent when the Score is 2-8?", "schema": "CREATE TABLE table_name_16 (opponent VARCHAR, score VARCHAR)", "sql": "SELECT opponent FROM table_name_16 WHERE score = '2-8';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What club had a score of 18.1?", "schema": "CREATE TABLE table_name_87 (club VARCHAR, score VARCHAR)", "sql": "SELECT club FROM table_name_87 WHERE score = '18.1';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Find the total number of streams for artists from Africa in 2021.", "schema": "CREATE TABLE streams (id INT, artist VARCHAR(50), country VARCHAR(50), streams INT, year INT); INSERT INTO streams (id, artist, country, streams, year) VALUES (1, 'Burna Boy', 'Nigeria', 4000000, 2021); INSERT INTO streams (id, artist, country, streams, year) VALUES (2, 'Wizkid', 'Nigeria', 5000000, 2021); INSERT INTO streams (id, artist, country, streams, year) VALUES (3, 'Angélique Kidjo', 'Benin', 3000000, 2021); INSERT INTO streams (id, artist, country, streams, year) VALUES (4, 'Tiwa Savage', 'Nigeria', 6000000, 2021); INSERT INTO streams (id, artist, country, streams, year) VALUES (5, 'Davido', 'Nigeria', 7000000, 2021);", "sql": "SELECT SUM(streams) FROM streams WHERE country IN (SELECT DISTINCT country FROM artists WHERE continent = 'Africa') AND year = 2021;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "What is the maximum capacity of the largest hotel in Brazil?", "schema": "CREATE TABLE Hotels (id INT, name TEXT, location TEXT, capacity INT); INSERT INTO Hotels (id, name, location, capacity) VALUES (1, 'Hotel Rio', 'Rio de Janeiro, Brazil', 800), (2, 'Hotel Sao Paulo', 'Sao Paulo, Brazil', 1000), (3, 'Hotel Amazon', 'Amazonas, Brazil', 1200);", "sql": "SELECT MAX(capacity) FROM Hotels WHERE location LIKE '%Brazil%';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "What is the total number of employees in each mine type?", "schema": "CREATE TABLE labor_productivity (id INT, mine_id INT, type TEXT, employees INT); INSERT INTO labor_productivity (id, mine_id, type, employees) VALUES (1, 1, 'Gold', 50), (2, 2, 'Silver', 25), (3, 3, 'Bronze', 15), (4, 4, 'Platinum', 40);", "sql": "SELECT type, SUM(employees) as total_employees FROM labor_productivity GROUP BY type;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "What is the average daily energy production (in MWh) for each wind farm in the month of January 2021?", "schema": "CREATE TABLE wind_farms (name VARCHAR(50), location VARCHAR(50), capacity FLOAT, primary key (name)); INSERT INTO wind_farms (name, location, capacity) VALUES ('Farm A', 'California', 100), ('Farm B', 'Texas', 150), ('Farm C', 'Oregon', 200); CREATE TABLE production (wind_farm VARCHAR(50), date DATE, energy_production FLOAT, primary key (wind_farm, date), foreign key (wind_farm) references wind_farms(name)); INSERT INTO production (wind_farm, date, energy_production) VALUES ('Farm A', '2021-01-01', 2500), ('Farm A', '2021-01-02', 2400), ('Farm B', '2021-01-01', 3500), ('Farm B', '2021-01-02', 3700), ('Farm C', '2021-01-01', 4500), ('Farm C', '2021-01-02', 4300);", "sql": "SELECT wind_farm, AVG(energy_production) as avg_daily_production FROM production WHERE date BETWEEN '2021-01-01' AND '2021-01-31' GROUP BY wind_farm, EXTRACT(MONTH FROM date), EXTRACT(YEAR FROM date);", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: PER is the IATA for which city?", "schema": "CREATE TABLE table_name_84 (city VARCHAR, iata VARCHAR)", "sql": "SELECT city FROM table_name_84 WHERE iata = 'per';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "What are the common ethical manufacturing practices in factories owned by women or non-binary individuals?", "schema": "CREATE TABLE factories (factory_id INT, name TEXT, location TEXT, owner TEXT, ethical_manufacturing BOOLEAN); INSERT INTO factories (factory_id, name, location, owner, ethical_manufacturing) VALUES (1, 'Factory A', 'City A', 'Female', true), (2, 'Factory B', 'City B', 'Male', false), (3, 'Factory C', 'City C', 'Non-binary', true), (4, 'Factory D', 'City D', 'Female', false);", "sql": "SELECT f1.owner, i1.ethical_manufacturing FROM factories f1 JOIN factories i1 ON f1.owner = i1.owner WHERE f1.owner IN ('Female', 'Non-binary') GROUP BY f1.owner, i1.ethical_manufacturing HAVING COUNT(*) > 1;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 208, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 348).", "schema": null, "sql": "SELECT interval '2147483647 months 2147483647 days 9223372036854775806 us';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '2147483647 months 2147483647 days 9223372036854775806 us') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 75, "num_statements": 1} {"question": "What is the average number of visits per vessel for vessels that have visited 'Port G'?", "schema": "CREATE TABLE ports (port_id INT, port_name TEXT, country TEXT); INSERT INTO ports (port_id, port_name, country) VALUES (1, 'Port A', 'USA'), (2, 'Port B', 'Canada'), (3, 'Port C', 'USA'), (4, 'Port D', 'Mexico'), (5, 'Port E', 'Brazil'), (6, 'Port F', 'Chile'), (7, 'Port G', 'Argentina'); CREATE TABLE visits (visit_id INT, vessel_id INT, port_id INT); INSERT INTO visits (visit_id, vessel_id, port_id) VALUES (1, 1, 1), (2, 2, 1), (3, 1, 2), (4, 2, 2), (5, 4, 5), (6, 3, 7), (7, 2, 7), (8, 1, 7);", "sql": "SELECT AVG(visits_per_vessel) FROM (SELECT COUNT(vessel_id) AS visits_per_vessel FROM visits WHERE port_id = (SELECT port_id FROM ports WHERE port_name = 'Port G') GROUP BY vessel_id) AS subquery;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 196, "num_statements": 1} {"question": "What is the number of cases of malaria in Kenya in 2017?", "schema": "CREATE TABLE infectious_diseases_2 (id INT, disease TEXT, state TEXT, year INT, cases INT); INSERT INTO infectious_diseases_2 (id, disease, state, year, cases) VALUES (1, 'malaria', 'Kenya', 2017, 100);", "sql": "SELECT disease, SUM(cases) as num_cases FROM infectious_diseases_2 WHERE state = 'Kenya' AND year = 2017 GROUP BY disease;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Which Barrel twist has a Stock of canadian 3rd generation and a Hand guards of short ribbed?", "schema": "CREATE TABLE table_name_33 (barrel_twist VARCHAR, stock VARCHAR, hand_guards VARCHAR)", "sql": "SELECT barrel_twist FROM table_name_33 WHERE stock = 'canadian 3rd generation' AND hand_guards = 'short ribbed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What player has The United States as the country, with t2 as the place?", "schema": "CREATE TABLE table_name_9 (player VARCHAR, country VARCHAR, place VARCHAR)", "sql": "SELECT player FROM table_name_9 WHERE country = 'united states' AND place = 't2';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 43).", "schema": null, "sql": "CREATE FUNCTION isnge(ean13, isbn13)\n\tRETURNS boolean\n\tAS 'int8ge'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 121, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 386).", "schema": null, "sql": "SELECT jsonb_populate_record(row(1,2)::jb_ordered_pair, '{\"x\": 1, \"y\": 0}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_populate_record(row(1,2)::jb_ordered_pair, '{\"x\": 1, \"y\": 0}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 104).", "schema": null, "sql": "select * from jsonb_path_query('[1,\"1\",2,\"2\",null]', '$[*] ? (@ == \"1\")');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from jsonb_path_query('[1,\"1\",2,\"2\",null]', '$[*] ? (@ == \"1\")')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: When did the team score 21 goals?", "schema": "CREATE TABLE table_name_67 (date VARCHAR, goal VARCHAR)", "sql": "SELECT date FROM table_name_67 WHERE goal = 21;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "What is the maximum donation amount per organization, for organizations that have received donations?", "schema": "CREATE TABLE donors (id INT, name TEXT, organization TEXT, country TEXT);CREATE TABLE donations (id INT, donor_id INT, amount DECIMAL(10,2)); INSERT INTO donors (id, name, organization, country) VALUES (1, 'Donor A', 'Organization 1', 'Country A'), (2, 'Donor B', 'Organization 2', 'Country A'), (3, 'Donor C', 'Organization 3', 'Country B'), (4, 'Donor D', 'Organization 4', 'Country C'); INSERT INTO donations (id, donor_id, amount) VALUES (1, 1, 500.00), (2, 1, 750.00), (3, 2, 300.00), (4, 3, 1000.00), (5, 4, 250.00);", "sql": "SELECT donors.organization, MAX(donations.amount) FROM donors INNER JOIN donations ON donors.id = donations.donor_id GROUP BY donors.organization;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 5).", "schema": null, "sql": "SELECT count(*) FROM int8tmp WHERE a = 464571291354841;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "What is the average safety violation cost per chemical plant in India?", "schema": "CREATE TABLE chemical_plants (plant_id INT, plant_name VARCHAR(50), country VARCHAR(50), safety_violation_cost DECIMAL(10,2)); INSERT INTO chemical_plants (plant_id, plant_name, country, safety_violation_cost) VALUES (1, 'Plant A', 'India', 5000), (2, 'Plant B', 'India', 7000), (3, 'Plant C', 'USA', 3000);", "sql": "SELECT AVG(safety_violation_cost) FROM chemical_plants WHERE country = 'India';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "How many mobile customers are there in each country?", "schema": "CREATE TABLE mobile_customers (customer_id INT, country VARCHAR(20)); INSERT INTO mobile_customers (customer_id, country) VALUES (1, 'USA'), (2, 'Canada'), (3, 'Mexico'), (4, 'USA'), (5, 'Canada');", "sql": "SELECT country, COUNT(*) FROM mobile_customers GROUP BY country;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'sepgsql' (example 46).", "schema": null, "sql": "SELECT objtype, objname, label FROM pg_seclabels\n WHERE provider = 'selinux' AND objtype = 'table' AND objname in ('t1', 't2', 't3',\n 'tpart',\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t 'tpart_ones',\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t 'tpart_tens')\n ORDER BY objname COLLATE \"C\" ASC;", "explanation": "Example query from the 'sepgsql' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 315, "num_statements": 1} {"question": "What is the average popularity of products in each size per month?", "schema": "CREATE TABLE trend (id INT, product_id INT, popularity INT, date DATE); INSERT INTO trend (id, product_id, popularity, date) VALUES (1, 1, 100, '2023-01-01'); CREATE TABLE size (id INT, size VARCHAR(50)); INSERT INTO size (id, size) VALUES (1, 'Small'), (2, 'Medium'), (3, 'Large');", "sql": "SELECT s.size, AVG(t.popularity) as avg_popularity, DATE_TRUNC('month', t.date) as month FROM trend t JOIN product p ON t.product_id = p.id JOIN size s ON p.size = s.size GROUP BY month, s.size ORDER BY month;", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 209, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the id and name of the photos for mountains?", "schema": "CREATE TABLE photos (mountain_id VARCHAR); CREATE TABLE mountain (id VARCHAR, name VARCHAR, height INTEGER)", "sql": "SELECT T1.id, T1.name FROM mountain AS T1 JOIN photos AS T2 ON T1.id = T2.mountain_id WHERE T1.height > 4000;", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What country is player ed sneed, who has a to par of +3, from?", "schema": "CREATE TABLE table_name_70 (country VARCHAR, to_par VARCHAR, player VARCHAR)", "sql": "SELECT country FROM table_name_70 WHERE to_par = '+3' AND player = 'ed sneed';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: What are the won games with losing bonus of 0?", "schema": "CREATE TABLE table_13758945_1 (won VARCHAR, losing_bonus VARCHAR)", "sql": "SELECT won FROM table_13758945_1 WHERE losing_bonus = '0';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Generate PostgreSQL SQL for: Name the season for position 4th", "schema": "CREATE TABLE table_25375093_1 (season VARCHAR, position VARCHAR)", "sql": "SELECT COUNT(season) FROM table_25375093_1 WHERE position = '4th';", "explanation": null, "validation_query": null, "source": "sql_create_context", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 42).", "schema": null, "sql": "SELECT * FROM INT8_TBL WHERE '123'::int2 >= q1;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INT8_TBL WHERE '123'::int2 >= q1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 319).", "schema": null, "sql": "-- Test diagnostics\nSELECT * FROM check_test(\n is_window( 'nonesuch'::name, 'whatever' ),\n false,\n 'is_window(nowin, desc)',\n 'whatever',\n ' Function nonesuch() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 197, "num_statements": 1} {"question": "What is the average rating of cruelty-free eyeshadows?", "schema": "CREATE TABLE product_rating (product_id INT, rating INT, cruelty_free BOOLEAN); INSERT INTO product_rating (product_id, rating, cruelty_free) VALUES (1, 4, true), (2, 3, false), (3, 5, true), (4, 4, false), (5, 5, true); CREATE TABLE product (product_id INT, product_name VARCHAR(30), product_type VARCHAR(30)); INSERT INTO product (product_id, product_name, product_type) VALUES (1, 'Eyeshadow', 'Powder'), (2, 'Foundation', 'Liquid'), (3, 'Lipstick', 'Matte'), (4, 'Moisturizer', 'Cream'), (5, 'Eyeshadow', 'Cream');", "sql": "SELECT AVG(rating) as avg_rating FROM product_rating JOIN product ON product_rating.product_id = product.product_id WHERE cruelty_free = true AND product_type = 'Eyeshadow';", "explanation": null, "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'update' (example 214).", "schema": null, "sql": "CREATE TABLE list_parted (\n\ta text,\n\tb int\n) PARTITION BY list (a);", "explanation": "DDL from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 67, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 40).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_table( '__SDFSDFD__', 'lol'::name ),\n true,\n 'hasnt_table(non-existent schema, tab)',\n 'Table \"__SDFSDFD__\".lol should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 186).", "schema": null, "sql": "SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT COUNT(id) FROM xmltest, query WHERE xmlexists(expr PASSING BY REF data)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 35).", "schema": null, "sql": "SELECT ' -INFINiTY '::float4;", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ' -INFINiTY '::float4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'incremental_sort' (example 124).", "schema": null, "sql": "insert into prt_tbl select i%200, i from generate_series(1,1000)i;", "explanation": "DML from PostgreSQL core regression test for Incremental Sort.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "PostgreSQL regression test 'privileges': Write the SELECT query (example 152).", "schema": null, "sql": "SELECT * FROM atest2 WHERE ( col1 IN ( SELECT b FROM atest1 ) );", "explanation": "Regression test for Privileges in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM atest2 WHERE ( col1 IN ( SELECT b FROM atest1 ) )) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 31).", "schema": null, "sql": "SELECT * FROM test_argresult_array_domain(ARRAY[0, 100]::ordered_pair_domain);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "PostgreSQL regression test 'foreign_key': Write the SELECT query (example 505).", "schema": null, "sql": "select conname from pg_constraint where conrelid = 'fktable2'::regclass order by conname;", "explanation": "Regression test for Foreign Key in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select conname from pg_constraint where conrelid = 'fktable2'::regclass order by conname) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Setup (example 4).", "schema": null, "sql": "create function tcl_argisnull(text) returns bool as '\n argisnull 1\n' language pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Setup.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'sequence' (example 38).", "schema": null, "sql": "INSERT INTO serialTest2 (f1, f6)\n VALUES ('bogus', -9223372036854775809);", "explanation": "DML from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 93).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Makayla');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 364).", "schema": null, "sql": "SELECT to_date('-100000000', 'CC');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_date('-100000000', 'CC')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sequence' (example 4).", "schema": null, "sql": "CREATE SEQUENCE sequence_testx INCREMENT BY -1 START 10;", "explanation": "DDL from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 19).", "schema": null, "sql": "INSERT INTO d_star (class, a, b, c)\n VALUES ('d', 8, 'stumble'::text, 'hi koko'::name);", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "PostgreSQL regression test 'errors': Write the SELECT query (example 10).", "schema": null, "sql": "select null from pg_database group by grouping sets (()) for update;", "explanation": "Regression test for Errors in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select null from pg_database group by grouping sets (()) for update) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 108).", "schema": null, "sql": "SELECT * FROM check_test(\n languages_are( array_append(___mylangs(''), 'plomgwtf'), 'whatever' ),\n false,\n 'languages_are(languages, desc) missing',\n 'whatever',\n ' Missing languages:\n plomgwtf'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 222, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Simple (example 5).", "schema": null, "sql": "create function simple1.simpletarget(int) returns int language plpgsql\nas $$begin return $1; end$$;\n\ncreate function simpletarget(int) returns int language plpgsql\nas $$begin return $1 + 100; end$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Simple.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 198, "num_statements": 4} {"question": "PostgreSQL regression test 'rowsecurity': Write the SELECT query (example 830).", "schema": null, "sql": "SELECT attname, most_common_vals FROM pg_stats\n WHERE tablename = 'current_check'\n ORDER BY 1;", "explanation": "Regression test for Rowsecurity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT attname, most_common_vals FROM pg_stats\n WHERE tablename = 'current_check'\n ORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 327).", "schema": null, "sql": "select array_fill('juhu'::text, array[3,3]);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select array_fill('juhu'::text, array[3,3])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 38).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('1997-02-10 17:32:01-08');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 195).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 49).", "schema": null, "sql": "begin\n\tnew.description = 'updated in trigger';", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 46, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 139).", "schema": null, "sql": "SELECT (-9223372036854775808)::int8 / (-1)::int2;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (-9223372036854775808)::int8 / (-1)::int2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 193).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb 'null', '$ts' PASSING timestamptz '2018-02-21 12:34:56 +10' AS ts RETURNING json);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb 'null', '$ts' PASSING timestamptz '2018-02-21 12:34:56 +10' AS ts RETURNING json)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 106, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 275).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_window( 'pg_catalog', 'dense_rank', '{}'::name[] ),\n false,\n 'isnt_window(schema, win, noargs)',\n 'Function pg_catalog.dense_rank() should not be a window function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 219, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 199).", "schema": null, "sql": "CREATE TABLE parted_uniq_tbl_2 PARTITION OF parted_uniq_tbl FOR VALUES FROM (20) TO (30);", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 122).", "schema": null, "sql": "SELECT * FROM check_test(\n rules_are( 'public', 'fou', ARRAY['ins_me', 'upd_me'] ),\n true,\n 'rules_are(schema, table, rules)',\n 'Relation public.fou should have the correct rules',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 202, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 741).", "schema": null, "sql": "select jsonb_path_query_tz(\n\t'[\"2017-03-10 12:34:00\", \"2017-03-10 12:35:00\", \"2017-03-10 12:36:00\", \"2017-03-10 12:35:00+01\", \"2017-03-10 13:35:00+01\", \"2017-03-10 12:35:00-01\", \"2017-03-10\", \"2017-03-11\", \"12:34:56\", \"12:34:56+01\"]',\n\t'$[*].datetime() ? (@ >= \"10.03.2017 12:35\".datetime(\"dd.mm.yyyy HH24:MI\"))');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query_tz(\n\t'[\"2017-03-10 12:34:00\", \"2017-03-10 12:35:00\", \"2017-03-10 12:36:00\", \"2017-03-10 12:35:00+01\", \"2017-03-10 13:35:00+01\", \"2017-03-10 12:35:00-01\", \"2017-03-10\", \"2017-03-11\", \"12:34:56\", \"12:34:56+01\"]',\n\t'$[*].datetime() ? (@ >= \"10.03.2017 12:35\".datetime(\"dd.mm.yyyy HH24:MI\"))')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 314, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 9).", "schema": null, "sql": "SELECT ((SELECT 2) UNION SELECT 2);", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ((SELECT 2) UNION SELECT 2)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 132).", "schema": null, "sql": "-- Create the operator class\nCREATE OPERATOR CLASS gist_time_ops\nDEFAULT FOR TYPE time USING gist\nAS\n\tOPERATOR\t1\t< ,\n\tOPERATOR\t2\t<= ,\n\tOPERATOR\t3\t= ,\n\tOPERATOR\t4\t>= ,\n\tOPERATOR\t5\t> ,\n\tOPERATOR\t6\t<> ,\n\tOPERATOR\t15\t<-> FOR ORDER BY pg_catalog.interval_ops ,\n\tFUNCTION\t1\tgbt_time_consistent (internal, time, int2, oid, internal),\n\tFUNCTION\t2\tgbt_time_union (internal, internal),\n\tFUNCTION\t3\tgbt_time_compress (internal),\n\tFUNCTION\t4\tgbt_decompress (internal),\n\tFUNCTION\t5\tgbt_time_penalty (internal, internal, internal),\n\tFUNCTION\t6\tgbt_time_picksplit (internal, internal),\n\tFUNCTION\t7\tgbt_time_same (gbtreekey16, gbtreekey16, internal),\n\tFUNCTION\t8\tgbt_time_distance (internal, time, int2, oid, internal),\n\tFUNCTION\t9\tgbt_time_fetch (internal),\n\tFUNCTION\t11\tgbt_time_sortsupport (internal),\n\tFUNCTION\t12 (\"any\", \"any\") gist_translate_cmptype_btree (int),\n\tSTORAGE\t\tgbtreekey16;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 878, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'without_overlaps' (example 392).", "schema": null, "sql": "CREATE TABLE temporal_rng2 (\n id1 int4range,\n id2 int4range,\n valid_at daterange,\n CONSTRAINT temporal_rng2_pk PRIMARY KEY (id1, id2, valid_at WITHOUT OVERLAPS)\n);", "explanation": "DDL from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 167, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 138).", "schema": null, "sql": "SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 248).", "schema": null, "sql": "SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 204).", "schema": null, "sql": "select formarray(1.1, variadic array[1.2,55.5]);", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select formarray(1.1, variadic array[1.2,55.5])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 48, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 459).", "schema": null, "sql": "create table pk51 partition of pk5 for values from (4000) to (4500);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'drop_if_exists' (example 68).", "schema": null, "sql": "DROP FUNCTION IF EXISTS test_function_exists(int, text, int[]);", "explanation": "PL/pgSQL object from PostgreSQL core test for Drop If Exists.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 59).", "schema": null, "sql": "SELECT * FROM check_test(\n view_owner_is('public', '__not__someview', current_user, 'mumble'),\n\tfalse,\n 'view_owner_is(sch, non-view, user)',\n 'mumble',\n ' View public.__not__someview does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 217, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_mappings, item 6).", "schema": null, "sql": "CREATE TABLE zdb.type_mappings\n(\n type_name regtype NOT NULL PRIMARY KEY,\n definition jsonb DEFAULT NULL,\n is_default boolean DEFAULT false NOT NULL,\n funcid regproc DEFAULT null\n);", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 214, "num_statements": 1} {"question": "PostgreSQL regression test 'truncate': Write the SELECT query (example 151).", "schema": null, "sql": "select tp_ins_data();", "explanation": "Regression test for Truncate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select tp_ins_data()) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 204).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (4,8,'7874342.4119');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 18).", "schema": null, "sql": "SELECT gc_to_sec(1000)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 207).", "schema": null, "sql": "SELECT * FROM check_test(\n has_leftop( '+', 'text', 'inte', 'desc' ),\n false,\n 'has_leftop( name, right, result, desc ) fail',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 147, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 188).", "schema": null, "sql": "SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 79, "num_statements": 1} {"question": "PostgreSQL regression test 'name': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT c.f1 FROM NAME_TBL c WHERE c.f1 !~ '.*';", "explanation": "Regression test for Name in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT c.f1 FROM NAME_TBL c WHERE c.f1 !~ '.*') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_merge' (example 118).", "schema": null, "sql": "CREATE TABLE sales_apr2022_10_20 PARTITION OF sales_apr2022 FOR VALUES FROM ('2022-04-10') TO ('2022-04-20');", "explanation": "DDL from PostgreSQL core regression test for Partition Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 3).", "schema": null, "sql": "INSERT INTO test_timestamptz VALUES\n\t( '2004-10-26 03:55:08' ),\n\t( '2004-10-26 04:55:08' ),\n\t( '2004-10-26 05:55:08' ),\n\t( '2004-10-26 08:55:08' ),\n\t( '2004-10-27 09:55:08' ),\n\t( '2004-10-27 10:55:08' )\n;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 204, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/btree_gin/btree_gin--1.0.sql */\n\n-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION btree_gin\" to load this file. \\quit\n\nCREATE FUNCTION gin_btree_consistent(internal, int2, anyelement, int4, internal, internal)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 337, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 165).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _ckeys ( NAME, CHAR );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 381).", "schema": null, "sql": "SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS my_io_stats_post_reset\n FROM pg_stat_get_backend_io(pg_backend_pid()) \\gset\n-- pg_stat_reset_shared() did not reset backend IO stats\nSELECT :my_io_stats_pre_reset <= :my_io_stats_post_reset;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS my_io_stats_post_reset\n FROM pg_stat_get_backend_io(pg_backend_pid()) \\gset\n-- pg_stat_reset_shared() did not reset backend IO stats\nSELECT :my_io_stats_pre_reset <= :my_io_stats_post_reset) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 316, "num_statements": 1} {"question": "PostgreSQL regression test 'money': Write the SELECT query (example 90).", "schema": null, "sql": "SELECT (-12345678901234567)::money;", "explanation": "Regression test for Money in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (-12345678901234567)::money) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 379).", "schema": null, "sql": "CREATE TABLE notnull_tbl4_cld () INHERITS (notnull_tbl4);", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 18).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : 4, \"ndistinct\" : 4}]', 'pg_ndistinct');", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"ndistinct\" : 4, \"ndistinct\" : 4}]', 'pg_ndistinct')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 435).", "schema": null, "sql": "select unnest('11 22 33'::oidvector);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select unnest('11 22 33'::oidvector)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 37, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 24).", "schema": null, "sql": "SELECT '{123,623,445}'::int[] | '{1623,623}';", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 48.", "schema": null, "sql": "DECLARE curs1 refcursor; curs2 CURSOR FOR SELECT * FROM tenk1; curs3 CURSOR (key integer) FOR SELECT * FROM tenk1 WHERE unique1 = key;", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 134, "num_statements": 3} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 680).", "schema": null, "sql": "drop function inner_func(int);", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 806).", "schema": null, "sql": "select ('{\"a\": 1, \"b\": \"c\", \"d\": [1, 2, 3]}'::jsonb)['d'];", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select ('{\"a\": 1, \"b\": \"c\", \"d\": [1, 2, 3]}'::jsonb)['d']) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Index (assertion 109).", "schema": null, "sql": "SELECT * FROM check_test(\n is_indexed( 'public', 'sometab'::name, 'myint'::name ),\n false,\n 'is_indexed( schema, table, column ) fail',\n 'Should have an index on public.sometab(myint)',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 207, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 486).", "schema": null, "sql": "select jsonb_path_query('[]', '$.number()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('[]', '$.number()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 1).", "schema": null, "sql": "/* contrib/ltree/ltree--1.1.sql */\n\n-- complain if script is sourced in psql, rather than via CREATE EXTENSION\n\\echo Use \"CREATE EXTENSION ltree\" to load this file. \\quit\n\nCREATE FUNCTION ltree_in(cstring)\nRETURNS ltree\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 283, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 525).", "schema": null, "sql": "CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 361).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (8,7,'83103366');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 22).", "schema": null, "sql": "SELECT test_json ->> 'field4' FROM test_jsonb WHERE json_type = 'object';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT test_json ->> 'field4' FROM test_jsonb WHERE json_type = 'object') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'intarray' (example 66).", "schema": null, "sql": "SELECT '1&(2&(4&(5|6)))'::query_int;", "explanation": "Example query from the 'intarray' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 191).", "schema": null, "sql": "CREATE FUNCTION isnle(upc, upc)\n\tRETURNS boolean\n\tAS 'int8le'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 2.", "schema": null, "sql": "CREATE TABLE products ( product_no integer, name text, price numeric );", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 74).", "schema": null, "sql": "SELECT xmlroot(xml '', version no value, standalone yes);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlroot(xml '', version no value, standalone yes)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'psql' (example 214).", "schema": null, "sql": "create table child_20_30 partition of parent_tab\n for values from (20) to (30);", "explanation": "DDL from PostgreSQL core regression test for Psql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "pgTAP test for Enumtap (assertion 31).", "schema": null, "sql": "SELECT * FROM check_test(\n enums_are( 'public', ARRAY['freddy'], 'whatever' ),\n false,\n 'enums_are(schema, enums, desc) fail',\n 'whatever',\n ' Extra types:\n bug_status\n Missing types:\n freddy'\n);", "explanation": "SQL assertion from pgTAP test suite for Enumtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 230, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table' (example 139).", "schema": null, "sql": "CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 150, REMAINDER 3);", "explanation": "DDL from PostgreSQL core regression test for Create Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.91.0--0.92.0 (assertion 96).", "schema": null, "sql": "-- language_privs_are ( lang, user, privileges[] )\nCREATE OR REPLACE FUNCTION language_privs_are ( NAME, NAME, NAME[] )\nRETURNS TEXT AS $$\n SELECT language_privs_are(\n $1, $2, $3,\n 'Role ' || quote_ident($2) || ' should be granted '\n || CASE WHEN $3[1] IS NULL THEN 'no privileges' ELSE array_to_string($3, ', ') END\n || ' on language ' || quote_ident($1)\n );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.91.0--0.92.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 418, "num_statements": 2} {"question": "Write the DDL statement from PostgreSQL regression test 'brin_bloom' (example 62).", "schema": null, "sql": "CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2);", "explanation": "DDL from PostgreSQL core regression test for Brin Bloom.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 106, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'numeric_big' (example 16).", "schema": null, "sql": "CREATE TABLE num_exp_sqrt (id int4, expected numeric(1000,800));", "explanation": "DDL from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgstattuple' (example 75).", "schema": null, "sql": "select pgstatginindex('test_sequence');", "explanation": "Example query from the 'pgstattuple' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 370).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (8,7,'-818934531574859518.35936275646834493832011429282408849567717761204690035294074716714939441961175772404289860039233415598996234758590850206505669201200');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 191, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'generated_stored' (example 115).", "schema": null, "sql": "INSERT INTO gtest_varlena (a) VALUES('01234567890123456789');", "explanation": "DML from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 137).", "schema": null, "sql": "SELECT interval '1 2:03:04.5678' hour to second(2);", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval '1 2:03:04.5678' hour to second(2)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 128).", "schema": null, "sql": "CREATE FUNCTION isngt(ismn13, ismn)\n\tRETURNS boolean\n\tAS 'int8gt'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 23).", "schema": null, "sql": "select sum(c) from gstest2\n group by grouping sets(grouping sets((a, (b))))\n order by 1 desc;", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select sum(c) from gstest2\n group by grouping sets(grouping sets((a, (b))))\n order by 1 desc) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'float8' (example 132).", "schema": null, "sql": "-- error functions\n-- we run these with extra_float_digits = -1, to get consistently rounded\n-- results on all platforms.\nSET extra_float_digits = -1;", "explanation": "PL/pgSQL object from PostgreSQL core test for Float8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 150, "num_statements": 1} {"question": "PostgreSQL Textsearch: show example 40.", "schema": null, "sql": "SELECT plainto_tsquery('english', 'The Fat Rats'); plainto_tsquery ----------------- 'fat' & 'rat';", "explanation": "Example from PostgreSQL documentation on Textsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 99, "num_statements": 2} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 176).", "schema": null, "sql": "SELECT jsonb_object_agg_unique_strict(i, null) OVER (ORDER BY i)\n FROM generate_series(1, 10) g(i);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_object_agg_unique_strict(i, null) OVER (ORDER BY i)\n FROM generate_series(1, 10) g(i)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 100, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 84).", "schema": null, "sql": "SELECT float4send('887745e-11'::float4);", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT float4send('887745e-11'::float4)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'float4' (example 5).", "schema": null, "sql": "INSERT INTO FLOAT4_TBL(f1) VALUES ('1.2345678901234e+20');", "explanation": "DML from PostgreSQL core regression test for Float4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE PUBLICATION (example 5).", "schema": null, "sql": "CREATE PUBLICATION production_publication FOR TABLE users, departments, TABLES IN SCHEMA production;", "explanation": "PostgreSQL CREATE PUBLICATION command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 29).", "schema": null, "sql": "INSERT INTO rls_test_src VALUES (1, 'src a');", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "pgTAP test for Pktap (assertion 33).", "schema": null, "sql": "SELECT * FROM check_test(\n col_is_pk( 'argh', ARRAY['id', 'name'], 'id + name should be a pk' ),\n true,\n 'col_is_pk( table, column[], description )',\n 'id + name should be a pk',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Pktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 279).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (6,4,'-5329378275943663322215245.20238058768123314540388318253964726313120648232235700755866801918195710344138369800874235399515094124581615597720798385015942389765692769739983054442503547211560297249686289665792078548480268091496050883021187158502798880896590227542729659940394038802461081290690995869705131152889309663639310553909874081663091069118126221594338242710530718836025225507189149221049928936955230868771875644038572888630664890573507822342998964954667474300944699078658989010257103569231493090050659723450626338923049035040974032671138430612839043269997482582763267536489504794826476836323549796385028155416935072959933315468068930689064483178204550825728947252440604703474049780550458442808479096492346910001692358508618202898514895453589357');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 789, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 158).", "schema": null, "sql": "create function trigger_nothing() returns trigger\n language plpgsql as $$ begin end; $$;\ncreate trigger failed instead of update on parted_trig\n for each row execute procedure trigger_nothing();\ncreate trigger failed after update on parted_trig\n referencing old table as old_table\n for each row execute procedure trigger_nothing();\ndrop table parted_trig;\n\n--\n-- Verify trigger creation for partitioned tables, and drop behavior\n--\ncreate table trigpart (a int, b int) partition by range (a);\ncreate table trigpart1 partition of trigpart for values from (0) to (1000);\ncreate trigger trg1 after insert on trigpart for each row execute procedure trigger_nothing();\ncreate table trigpart2 partition of trigpart for values from (1000) to (2000);\ncreate table trigpart3 (like trigpart);\nalter table trigpart attach partition trigpart3 for values from (2000) to (3000);\ncreate table trigpart4 partition of trigpart for values from (3000) to (4000) partition by range (a);\ncreate table trigpart41 partition of trigpart4 for values from (3000) to (3500);\ncreate table trigpart42 (like trigpart);\nalter table trigpart4 attach partition trigpart42 for values from (3500) to (4000);\nselect tgrelid::regclass, tgname, tgfoid::regproc from pg_trigger\n where tgrelid::regclass::text like 'trigpart%' order by tgrelid::regclass::text;\ndrop trigger trg1 on trigpart1;\t-- fail\ndrop trigger trg1 on trigpart2;\t-- fail\ndrop trigger trg1 on trigpart3;\t-- fail\ndrop table trigpart2;\t\t\t-- ok, trigger should be gone in that partition\nselect tgrelid::regclass, tgname, tgfoid::regproc from pg_trigger\n where tgrelid::regclass::text like 'trigpart%' order by tgrelid::regclass::text;\ndrop trigger trg1 on trigpart;\t\t-- ok, all gone\nselect tgrelid::regclass, tgname, tgfoid::regproc from pg_trigger\n where tgrelid::regclass::text like 'trigpart%' order by tgrelid::regclass::text;\n\n-- check detach behavior\ncreate trigger trg1 after insert on trigpart for each row execute procedure trigger_nothing();\n\\d trigpart3\nalter table trigpart detach partition trigpart3;\ndrop trigger trg1 on trigpart3; -- fail due to \"does not exist\"\nalter table trigpart detach partition trigpart4;\ndrop trigger trg1 on trigpart41; -- fail due to \"does not exist\"\ndrop table trigpart4;\nalter table trigpart attach partition trigpart3 for values from (2000) to (3000);\nalter table trigpart detach partition trigpart3;\nalter table trigpart attach partition trigpart3 for values from (2000) to (3000);\ndrop table trigpart3;\n\nselect tgrelid::regclass::text, tgname, tgfoid::regproc, tgenabled, tgisinternal from pg_trigger\n where tgname ~ '^trg1' order by 1;\ncreate table trigpart3 (like trigpart);\ncreate trigger trg1 after insert on trigpart3 for each row execute procedure trigger_nothing();\n\\d trigpart3\nalter table trigpart attach partition trigpart3 FOR VALUES FROM (2000) to (3000); -- fail\ndrop table trigpart3;\n\n-- check display of unrelated triggers\ncreate trigger samename after delete on trigpart execute function trigger_nothing();\ncreate trigger samename after delete on trigpart1 execute function trigger_nothing();\n\\d trigpart1\n\ndrop table trigpart;\ndrop function trigger_nothing();\n\n--\n-- Verify that triggers are fired for partitioned tables\n--\ncreate table parted_stmt_trig (a int) partition by list (a);\ncreate table parted_stmt_trig1 partition of parted_stmt_trig for values in (1);\ncreate table parted_stmt_trig2 partition of parted_stmt_trig for values in (2);\n\ncreate table parted2_stmt_trig (a int) partition by list (a);\ncreate table parted2_stmt_trig1 partition of parted2_stmt_trig for values in (1);\ncreate table parted2_stmt_trig2 partition of parted2_stmt_trig for values in (2);\n\ncreate or replace function trigger_notice() returns trigger as $$\n begin\n raise notice 'trigger % on % % % for %', TG_NAME, TG_TABLE_NAME, TG_WHEN, TG_OP, TG_LEVEL;", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 3838, "num_statements": 49} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 5).", "schema": null, "sql": "INSERT INTO timetzcmp (r_id,a) SELECT 2,count(*) FROM timetztmp WHERE a <= '07:46:45 GMT+3';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 107).", "schema": null, "sql": "/****************************************************************************/\n-- Test is_definer() isnt_definer().\nSELECT * FROM check_test(\n is_definer( 'public', 'yay', '{}'::name[], 'whatever' ),\n true,\n 'is_definer(schema, func, 0 args, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 284, "num_statements": 1} {"question": "PostgreSQL Textsearch: show example 7.", "schema": null, "sql": "SELECT title FROM pgweb WHERE to_tsvector('english', body) @@ to_tsquery('english', 'friend');", "explanation": "Example from PostgreSQL documentation on Textsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_data' (example 7).", "schema": null, "sql": "CREATE ROLE regress_test_role;", "explanation": "DDL from PostgreSQL core regression test for Foreign Data.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 164).", "schema": null, "sql": "SELECT jsonb_build_array('a', NULL); -- ok\nSELECT jsonb_build_array(VARIADIC NULL::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{a,b,c}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC ARRAY['a', NULL]::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{1,2,3,4}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{1,2,3,4}'::int[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{{1,4},{2,5},{3,6}}'::int[][]); -- ok\n\nSELECT jsonb_build_object('a',1,'b',1.2,'c',true,'d',null,'e',json '{\"x\": 3, \"y\": [1,2,3]}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_build_array('a', NULL); -- ok\nSELECT jsonb_build_array(VARIADIC NULL::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{a,b,c}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC ARRAY['a', NULL]::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{1,2,3,4}'::text[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{1,2,3,4}'::int[]); -- ok\nSELECT jsonb_build_array(VARIADIC '{{1,4},{2,5},{3,6}}'::int[][]); -- ok\n\nSELECT jsonb_build_object('a',1,'b',1.2,'c',true,'d',null,'e',json '{\"x\": 3, \"y\": [1,2,3]}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 570, "num_statements": 9} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 29).", "schema": null, "sql": "INSERT INTO replication_example(somedata, text) VALUES (1, 5);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 2).", "schema": null, "sql": "CREATE FUNCTION postgres_fdw_disconnect (text)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT PARALLEL RESTRICTED;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 119, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'memoize' (example 45).", "schema": null, "sql": "CREATE TABLE prt_p2 PARTITION OF prt FOR VALUES FROM (10) TO (20);", "explanation": "DDL from PostgreSQL core regression test for Memoize.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 60).", "schema": null, "sql": "CREATE SCHEMA testviewschm2;", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 42).", "schema": null, "sql": "CREATE FUNCTION test_type_conversion_float8(x float8) RETURNS float8 AS $$\nplpy.info(x, type(x))\nreturn x\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 6).", "schema": null, "sql": "CREATE FUNCTION spi_prepared_plan_test_nested(a text) RETURNS text\n\tAS\n'if \"myplan\" not in SD:\n\tq = \"SELECT spi_prepared_plan_test_one(''%s'') as count\" % a\n\tSD[\"myplan\"] = plpy.prepare(q)\ntry:\n\trv = plpy.execute(SD[\"myplan\"])\n\tif len(rv):\n\t\treturn rv[0][\"count\"]\nexcept Exception as ex:\n\tplpy.error(str(ex))\nreturn None\n'\n\tLANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 344, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 412).", "schema": null, "sql": "select * from jsonb_to_recordset('[{\"a\":1,\"b\":\"foo\",\"d\":false},{\"a\":2,\"b\":\"bar\",\"c\":true}]')\n as x(a int, b text, c boolean);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from jsonb_to_recordset('[{\"a\":1,\"b\":\"foo\",\"d\":false},{\"a\":2,\"b\":\"bar\",\"c\":true}]')\n as x(a int, b text, c boolean)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 128, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 188).", "schema": null, "sql": "SELECT regexp_substr('abcabcabc', 'a.c', 2);", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regexp_substr('abcabcabc', 'a.c', 2)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'rules': Write the SELECT query (example 317).", "schema": null, "sql": "SELECT * FROM shoelace_log;", "explanation": "Regression test for Rules in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM shoelace_log) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_table' (example 100).", "schema": null, "sql": "CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN ((1+1) collate \"POSIX\");", "explanation": "DDL from PostgreSQL core regression test for Create Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 757).", "schema": null, "sql": "SELECT to_number('CLXC', 'RN');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_number('CLXC', 'RN')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "PostgreSQL regression test 'subselect': Write the SELECT query (example 252).", "schema": null, "sql": "select nextval('ts1');", "explanation": "Regression test for Subselect in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select nextval('ts1')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 169).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (3,9,'24926808.355047420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 315).", "schema": null, "sql": "create table idxpart (a int4range, b int4range) partition by range (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 71, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 192).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (4,5,'7815858.450391');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 147).", "schema": null, "sql": "SELECT xpath(NULL, NULL) IS NULL FROM xmltest;", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xpath(NULL, NULL) IS NULL FROM xmltest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'aggregates' (example 181).", "schema": null, "sql": "insert into minmaxtest values(11), (12);", "explanation": "DML from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 725).", "schema": null, "sql": "select jsonb_set('{\"n\":null, \"a\":1, \"b\":[1,2], \"c\":{\"1\":2}, \"d\":{\"1\":[2,3]}}'::jsonb, '{d,1,0}', '{\"1\": 2}');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_set('{\"n\":null, \"a\":1, \"b\":[1,2], \"c\":{\"1\":2}, \"d\":{\"1\":[2,3]}}'::jsonb, '{d,1,0}', '{\"1\": 2}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL Textsearch: show example 6.", "schema": null, "sql": "SELECT phraseto_tsquery('cats ate rats'); phraseto_tsquery ------------------------------- 'cat' <-> 'ate' <-> 'rat' SELECT phraseto_tsquery('the cats ate the rats'); phraseto_tsquery ------------------------------- 'cat' <-> 'ate' <2> 'rat';", "explanation": "Example from PostgreSQL documentation on Textsearch.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 242, "num_statements": 3} {"question": "pgTAP test for Aretap (assertion 11).", "schema": null, "sql": "CREATE VIEW public.vou AS SELECT * FROM fou;", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 520).", "schema": null, "sql": "create table parted_replica_tab_1 partition of parted_replica_tab\n for values from (1) to (10) partition by range (id);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 120, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'identity' (example 77).", "schema": null, "sql": "INSERT INTO itestv11 VALUES (10, 'xyz');", "explanation": "DML from PostgreSQL core regression test for Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 40, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 17).", "schema": null, "sql": "SELECT pg_stat_reset_replication_slot('do-not-exist');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_data' (example 20).", "schema": null, "sql": "CREATE FOREIGN DATA WRAPPER foo OPTIONS (testing '1');", "explanation": "DDL from PostgreSQL core regression test for Foreign Data.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 175).", "schema": null, "sql": "SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data BY REF);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data BY REF)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Populate (example 9).", "schema": null, "sql": "INSERT INTO entry (accession, txid) VALUES ('A00002', '1') ;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Populate.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 38).", "schema": null, "sql": "CREATE FUNCTION seg_lower(seg)\nRETURNS float4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 520).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _definer ( NAME, NAME[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 232).", "schema": null, "sql": "SELECT replace(e.n_distinct, '}, ', E'},\\n') AS n_distinct,\n replace(e.dependencies, '}, ', E'},\\n') AS dependencies,\n replace(e.most_common_vals::text, '},', E'},\\n ') AS mcvs,\n e.most_common_val_nulls,\n e.most_common_freqs, e.most_common_base_freqs\nFROM pg_stats_ext AS e\nWHERE e.statistics_schemaname = 'stats_import' AND\n e.statistics_name = 'test_mr_stat' AND\n e.inherited = false\n\\gx\n\n-- Test the ability of pg_restore_extended_stats() to import all of the\n-- statistic values from an extended statistic object that has been\n-- populated via a regular ANALYZE. This checks after the statistics\n-- kinds supported by pg_restore_extended_stats().\n--\n-- Note: Keep this test at the bottom of the file, so as the amount of\n-- statistics data handled is maximized.\nANALYZE stats_import.test;", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT replace(e.n_distinct, '}, ', E'},\\n') AS n_distinct,\n replace(e.dependencies, '}, ', E'},\\n') AS dependencies,\n replace(e.most_common_vals::text, '},', E'},\\n ') AS mcvs,\n e.most_common_val_nulls,\n e.most_common_freqs, e.most_common_base_freqs\nFROM pg_stats_ext AS e\nWHERE e.statistics_schemaname = 'stats_import' AND\n e.statistics_name = 'test_mr_stat' AND\n e.inherited = false\n\\gx\n\n-- Test the ability of pg_restore_extended_stats() to import all of the\n-- statistic values from an extended statistic object that has been\n-- populated via a regular ANALYZE. This checks after the statistics\n-- kinds supported by pg_restore_extended_stats().\n--\n-- Note: Keep this test at the bottom of the file, so as the amount of\n-- statistics data handled is maximized.\nANALYZE stats_import.test) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "admin_maintenance", "is_postgresql_specific": true, "sql_length": 825, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 448).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION language_is_trusted( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'vacuum' (example 2).", "schema": null, "sql": "INSERT INTO vactst VALUES (1);", "explanation": "DML from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 129).", "schema": null, "sql": "SELECT '2'::seg >> '1'::seg AS bool;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'domain' (example 172).", "schema": null, "sql": "insert into nulltest values ('a', 'b', 'c', 'd', 'a');", "explanation": "DML from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'interval' (example 21).", "schema": null, "sql": "INSERT INTO INTERVAL_TBL (f1) VALUES ('6 years');", "explanation": "DML from PostgreSQL core regression test for Interval.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'without_overlaps' (example 108).", "schema": null, "sql": "INSERT INTO temporal_rng3 (id, valid_at) VALUES ('[1,2)', daterange('2018-03-03', '2018-04-04'));", "explanation": "DML from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (zombodb--3000.2.4--3000.2.5, item 2).", "schema": null, "sql": "CREATE FUNCTION zdb.schema_version() RETURNS text LANGUAGE sql AS $$\nSELECT '3000.2.5 (4704c7982040b2dd33d97334d3d4743082549630)'\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 54).", "schema": null, "sql": "SELECT sum(unique1) over (rows between 2 preceding and 2 following),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (rows between 2 preceding and 2 following),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 114, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'copy' (example 5).", "schema": null, "sql": "insert into copytest values(E'esc\\\\ape',E'a\\\\r\\\\\\r\\\\\\n\\\\nb',4);", "explanation": "DML from PostgreSQL core regression test for Copy.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 24).", "schema": null, "sql": "SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 1));", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'stats_ext' (example 384).", "schema": null, "sql": "-- create statistics on expressions\nCREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FROM functional_dependencies;", "explanation": "PL/pgSQL object from PostgreSQL core test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 24).", "schema": null, "sql": "SELECT timestamp with time zone 'J2452271 04:05:06+08';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT timestamp with time zone 'J2452271 04:05:06+08') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.3.0--1.3.1 (assertion 14).", "schema": null, "sql": "-- has_pk( schema, table )\nCREATE OR REPLACE FUNCTION has_pk ( NAME, NAME )\nRETURNS TEXT AS $$\n SELECT has_pk( $1, $2, 'Table ' || quote_ident($1) || '.' || quote_ident($2) || ' should have a primary key' );\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.3.0--1.3.1.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 227, "num_statements": 2} {"question": "PostgreSQL regression test 'alter_table': Write the SELECT query (example 649).", "schema": null, "sql": "SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a';", "explanation": "Regression test for Alter Table in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "PostgreSQL regression test 'int8': Write the SELECT query (example 28).", "schema": null, "sql": "SELECT * FROM INT8_TBL WHERE 123 > q1;", "explanation": "Regression test for Int8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INT8_TBL WHERE 123 > q1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 46).", "schema": null, "sql": "create table rlp5_default partition of rlp5 default;", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.3.0--1.3.1 (assertion 4).", "schema": null, "sql": "-- col_type_is( schema, table, column, schema, type )\nCREATE OR REPLACE FUNCTION col_type_is ( NAME, NAME, NAME, NAME, TEXT )\nRETURNS TEXT AS $$\n SELECT col_type_is( $1, $2, $3, $4, $5, 'Column ' || quote_ident($1) || '.' || quote_ident($2)\n || '.' || quote_ident($3) || ' should be type ' || quote_ident($4) || '.' || $5);\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.3.0--1.3.1.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 350, "num_statements": 2} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 5).", "schema": null, "sql": "CREATE TYPE cube (\n\tINTERNALLENGTH = variable,\n\tINPUT = cube_in,\n\tOUTPUT = cube_out,\n\tALIGNMENT = double\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 107, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'indirect_toast' (example 27).", "schema": null, "sql": "DROP FUNCTION update_using_indirect();", "explanation": "PL/pgSQL object from PostgreSQL core test for Indirect Toast.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "PostgreSQL regression test 'with': Write the SELECT query (example 204).", "schema": null, "sql": "SELECT * from id_alw1;", "explanation": "Regression test for With in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * from id_alw1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (telemetry, item 5).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.test_telemetry_main_conn(text, text)\nRETURNS BOOLEAN AS :MODULE_PATHNAME, 'ts_test_telemetry_main_conn' LANGUAGE C IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 188, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 22).", "schema": null, "sql": "-- hasnt_materialized_view( materialized_view )\nCREATE OR REPLACE FUNCTION hasnt_materialized_view ( NAME )\nRETURNS TEXT AS $$\n SELECT hasnt_materialized_view( $1, 'Materialized view ' || quote_ident($1) || ' should not exist' );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 249, "num_statements": 2} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 115).", "schema": null, "sql": "CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (_postgis-support, item 4).", "schema": null, "sql": "-- casting functions\n EXECUTE format('create or replace function zdb.geometry_to_json(%I.geometry, typmod integer DEFAULT -1) returns json parallel safe immutable strict language sql as $$\n SELECT CASE WHEN %I.postgis_typmod_type($2) = ''Point'' THEN\n zdb.point_to_json(%I.st_transform($1, 4326)::point)::json\n ELSE\n %I.st_asgeojson(%I.st_transform($1, 4326))::json\n END\n $$;',\n geojson_namespace, geojson_namespace, geojson_namespace, geojson_namespace, geojson_namespace);", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 570, "num_statements": 2} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 73).", "schema": null, "sql": "CREATE FUNCTION gin_extract_query_char(\"char\", internal, int2, internal, internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'earthdistance' (example 32).", "schema": null, "sql": "SELECT cube_ll_coord(ll_to_earth(-90,180),1)::numeric(20,5),\n cube_ll_coord(ll_to_earth(-90,180),2)::numeric(20,5),\n cube_ll_coord(ll_to_earth(-90,180),3)::numeric(20,5);", "explanation": "Example query from the 'earthdistance' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "PostgreSQL regression test 'union': Write the SELECT query (example 24).", "schema": null, "sql": "SELECT f1 AS three FROM VARCHAR_TBL\nUNION\nSELECT CAST(f1 AS varchar) FROM CHAR_TBL\nORDER BY 1;", "explanation": "Regression test for Union in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f1 AS three FROM VARCHAR_TBL\nUNION\nSELECT CAST(f1 AS varchar) FROM CHAR_TBL\nORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 94, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 86).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('Feb 11 17:32:01 1997');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "pgTAP test for Throwtap (assertion 1).", "schema": null, "sql": "\\unset ECHO\n\\i test/setup.sql\n\nSELECT plan(97);", "explanation": "SQL assertion from pgTAP test suite for Throwtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Array (example 18).", "schema": null, "sql": "select plperl_sum_row_elements(ROW(1, ARRAY[2,3,4,5,6,7,8,9,10])::rowfoo);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Array.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_import' (example 25).", "schema": null, "sql": "CREATE INDEX part_parent_i ON stats_import.part_parent(i);", "explanation": "DDL from PostgreSQL core regression test for Stats Import.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 251).", "schema": null, "sql": "create index on idxpart2 (a) where b > 1000;", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "PostgreSQL regression test 'advisory_lock': Write the SELECT query (example 12).", "schema": null, "sql": "SELECT\n\tpg_advisory_lock(1), pg_advisory_lock_shared(2),\n\tpg_advisory_lock(1, 1), pg_advisory_lock_shared(2, 2);", "explanation": "Regression test for Advisory Lock in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT\n\tpg_advisory_lock(1), pg_advisory_lock_shared(2),\n\tpg_advisory_lock(1, 1), pg_advisory_lock_shared(2, 2)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 112, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'transactions' (example 189).", "schema": null, "sql": "-- Now the same test with plpgsql (since it depends on SPI which is different)\ncreate or replace function max_xacttest() returns smallint language plpgsql as\n'begin return max(a) from xacttest; end' stable;", "explanation": "PL/pgSQL object from PostgreSQL core test for Transactions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 206, "num_statements": 2} {"question": "Show a SQL definition from the pgaudit project (pgaudit--18.0, item 1).", "schema": null, "sql": "\\echo Use \"CREATE EXTENSION pgaudit\" to load this file.\\quit\n\nCREATE FUNCTION pgaudit_ddl_command_end()\n\tRETURNS event_trigger\n\tSECURITY DEFINER\n\tSET search_path = pg_catalog, pg_temp\n\tLANGUAGE C\n\tAS 'MODULE_PATHNAME', 'pgaudit_ddl_command_end';", "explanation": "SQL definition from the open-source pgaudit PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 245, "num_statements": 1} {"question": "Show an example of PostgreSQL INSERT (example 9).", "schema": null, "sql": "WITH upd AS ( UPDATE employees SET sales_count = sales_count + 1 WHERE id = (SELECT sales_person FROM accounts WHERE name = 'Acme Corporation') RETURNING * ) INSERT INTO employees_log SELECT *, current_timestamp FROM upd;", "explanation": "PostgreSQL INSERT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 221, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 11).", "schema": null, "sql": "INSERT INTO spill_test SELECT 'serialize-subbig--1:'||g.i FROM generate_series(1, 5000) g(i);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 220).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (5,2,'-34322095.176906047');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'text': Write the SELECT query (example 37).", "schema": null, "sql": "select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', NULL, 'Hello');", "explanation": "Regression test for Text in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', NULL, 'Hello')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'join' (example 496).", "schema": null, "sql": "CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);", "explanation": "DDL from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 13).", "schema": null, "sql": "select ' ( \" a \" \" a \", \" z \" \" z \" ) '::textrange;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select ' ( \" a \" \" a \", \" z \" \" z \" ) '::textrange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 23).", "schema": null, "sql": "SELECT * FROM test_argresult_nnint(null, 20);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 90).", "schema": null, "sql": "-- Make sure that it works when the default is a reserved SQL expression.\nCREATE OR REPLACE FUNCTION ckreserve() RETURNS SETOF TEXT LANGUAGE PLPGSQL AS $$\nDECLARE\n funcs text[] := '{CURRENT_CATALOG,CURRENT_ROLE,CURRENT_SCHEMA,CURRENT_USER,SESSION_USER,USER,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,LOCALTIME,LOCALTIMESTAMP}';\n cols TEXT[] := '{ccat,crole,csch,cuser,suser,auser,cdate,ctime,ctstz,ltime,ltstz}';\n exp TEXT[] := funcs;\n tap record;\n last_index INTEGER;\nBEGIN\n last_index := array_upper(funcs, 1);\n IF pg_version_num() < 100000 THEN\n -- Prior to PostgreSQL 10, these were functions rendered with paretheses or as casts.\n exp := ARRAY['current_database()','\"current_user\"()','\"current_schema\"()','\"current_user\"()','\"session_user\"()','\"current_user\"()','(''now''::text)::date','(''now''::text)::time with time zone','now()','(''now''::text)::time without time zone','(''now''::text)::timestamp without time zone'];\n END IF;\n\n FOR i IN 1..last_index LOOP\n FOR tap IN SELECT * FROM check_test(\n col_default_is( 'sometab', cols[i], exp[i], 'Test ' || funcs[i] ),\n true,\n 'col_default_is( tab, col, ' || funcs[i] || ' )',\n 'Test ' || funcs[i],\n ''\n ) AS b LOOP RETURN NEXT tap.b; END LOOP;\n END LOOP;\nEND;\n$$;", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 1347, "num_statements": 13} {"question": "PL/pgSQL test: Plpython Record (example 19).", "schema": null, "sql": "SELECT * FROM test_table_record_as('list', 'one', null, false);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 292).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb '123', '$' RETURNING queryfuncs_char2_chk ERROR ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb '123', '$' RETURNING queryfuncs_char2_chk ERROR ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 82, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 223).", "schema": null, "sql": "create table rp2 partition of rp for values from (2) to (maxvalue);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert' (example 9).", "schema": null, "sql": "insert into inserttest (col1, col2, col3) values (1, 2);", "explanation": "DML from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 23).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.get_partition_hash(val anyelement) RETURNS integer LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.get_partition_hash(anyelement) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n RETURN _timescaledb_functions.get_partition_hash($1);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 535, "num_statements": 4} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 259).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (5,9,'-707619110.78141098833556856308817117136192658504561165951731229431651264331543278598450117846625251667849259592530287073315399782168794294250299770032264633712037469256688885911649778714039732161560189579333758422588445749233730591792217152212229008169062714458263709952275557558931748845536759606982982654369800245696528893058665897330942472105350178781035298449067051916630343957356635391594362639819978677032855590055900561501350354631803808000307050416047072513406855040715556454205065332997338225626635780147287003130754254277103928406089109802521803537038957372612837169223905290912251006321930223154562110264217937');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 662, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 149).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '!d.*';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 75).", "schema": null, "sql": "CREATE TABLE tbl1 ( a int, b int);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'vacuum': Write the SELECT query (example 15).", "schema": null, "sql": "SELECT count(*) FROM vactst;", "explanation": "Regression test for Vacuum in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM vactst) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'merge' (example 367).", "schema": null, "sql": "CREATE TABLE part3 PARTITION OF pa_target FOR VALUES IN (3,8,9)\n WITH (autovacuum_enabled=off);", "explanation": "DDL from PostgreSQL core regression test for Merge.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "pgTAP test for Index (assertion 16).", "schema": null, "sql": "SELECT * FROM check_test(\n has_index( 'public', 'sometab', 'idx_hey', 'numb'::name ),\n true,\n 'has_index() single column no desc',\n 'Index idx_hey should exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 183, "num_statements": 1} {"question": "PostgreSQL Rules: show example 51.", "schema": null, "sql": "UPDATE shoelace_data SET sl_name = s.sl_name, sl_avail = s.sl_avail + shoelace_arrive.arr_quant, sl_color = s.sl_color, sl_len = s.sl_len, sl_unit = s.sl_unit FROM shoelace_arrive shoelace_arrive, shoelace_ok shoelace_ok, shoelace_ok old, shoelace_ok new, shoelace shoelace, shoelace old, shoelace new, shoelace_data shoelace_data, shoelace old, shoelace new, shoelace_data s, unit u WHERE s.sl_name = shoelace_arrive.arr_name AND shoelace_data.sl_name = s.sl_name;", "explanation": "Example from PostgreSQL documentation on Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 465, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 108).", "schema": null, "sql": "SELECT to_regnamespace('foo.bar');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_regnamespace('foo.bar')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (test-time-mixed-case, item 9).", "schema": null, "sql": "CREATE TABLE \"Partman_Test\".\"FK_Test_Reference\" (\"Col2\" text unique not null);", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_am' (example 132).", "schema": null, "sql": "CREATE FOREIGN DATA WRAPPER fdw_heap2 VALIDATOR postgresql_fdw_validator;", "explanation": "DDL from PostgreSQL core regression test for Create Am.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'jsonb' (example 265).", "schema": null, "sql": "CREATE DOMAIN jsb_int_not_null AS int NOT NULL;", "explanation": "DDL from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 52, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 314).", "schema": null, "sql": "CREATE TABLE shoelace_log (\n sl_name char(10), -- shoelace changed\n sl_avail integer, -- new available value\n log_who name, -- who did it\n log_when timestamp -- when\n );", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 235, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 54).", "schema": null, "sql": "CREATE OPERATOR ~ (\n PROCEDURE = texticregexeq,\n LEFTARG = citext,\n RIGHTARG = text,\n NEGATOR = !~,\n RESTRICT = icregexeqsel,\n JOIN = icregexeqjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 182, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'tablefunc' (example 73).", "schema": null, "sql": "SELECT * FROM connectby('connectby_int', 'keyid', 'parent_keyid', '2', 0) AS t(keyid int, parent_keyid text, level int);", "explanation": "Example query from the 'tablefunc' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "pgTAP test for Index (assertion 77).", "schema": null, "sql": "/****************************************************************************/\n-- Test index_is_partial().\nSELECT * FROM check_test(\n index_is_partial( 'public', 'sometab', 'idx_partial', 'whatever' ),\n true,\n 'index_is_partial()',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 266, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 30).", "schema": null, "sql": "SELECT index('a.1.2.3.4.5.6','6');", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 367).", "schema": null, "sql": "SELECT regexp_replace('Thomas'::citext, '.[MN]A.'::citext, 'M', 'c') = 'Thomas' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 324).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_domain( NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'inet': Write the SELECT query (example 57).", "schema": null, "sql": "SELECT * FROM inet_tbl WHERE i && '192.168.1.0/24'::cidr ORDER BY i;", "explanation": "Regression test for Inet in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM inet_tbl WHERE i && '192.168.1.0/24'::cidr ORDER BY i) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 31).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_retention_check(config jsonb) RETURNS void LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.policy_retention_check(jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n PERFORM _timescaledb_functions.policy_retention_check($1);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 538, "num_statements": 4} {"question": "Write the DML statement from PostgreSQL regression test 'tuplesort' (example 4).", "schema": null, "sql": "INSERT INTO abbrev_abort_uuids (abort_increasing, abort_decreasing, noabort_increasing, noabort_decreasing)\n SELECT\n ('00000000-0000-0000-0000-'||to_char(g.i, '000000000000FM'))::uuid abort_increasing,\n ('00000000-0000-0000-0000-'||to_char(20000 - g.i, '000000000000FM'))::uuid abort_decreasing,\n (to_char(g.i % 10009, '00000000FM')||'-0000-0000-0000-'||to_char(g.i, '000000000000FM'))::uuid noabort_increasing,\n (to_char(((20000 - g.i) % 10009), '00000000FM')||'-0000-0000-0000-'||to_char(20000 - g.i, '000000000000FM'))::uuid noabort_decreasing\n FROM generate_series(0, 20000, 1) g(i);", "explanation": "DML from PostgreSQL core regression test for Tuplesort.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 621, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 446).", "schema": null, "sql": "$$ language plpgsql;\n\nselect forc01();\n\nselect * from forc_test;\n\ndrop function forc01();\n\n-- it's okay to re-use a cursor variable name, even when bound\n\ndo $$\ndeclare cnt int := 0;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 182, "num_statements": 5} {"question": "PostgreSQL regression test 'sequence': Write the SELECT query (example 49).", "schema": null, "sql": "SELECT nextval('sequence_test'::text);", "explanation": "Regression test for Sequence in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nextval('sequence_test'::text)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 869).", "schema": null, "sql": "DELETE FROM pk WHERE a = 2;", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "PostgreSQL regression test 'portals': Write the SELECT query (example 161).", "schema": null, "sql": "SELECT * FROM uctest;", "explanation": "Regression test for Portals in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM uctest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 511).", "schema": null, "sql": "INSERT INTO rem1 values(2, 'insert') RETURNING f2;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 50, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 1179).", "schema": null, "sql": "CREATE POLICY p1 ON rls_tbl_force USING (c1 = 5) WITH CHECK (c1 < 5);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 96).", "schema": null, "sql": "SELECT * FROM nummultirange_test WHERE nmr > '{}';", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM nummultirange_test WHERE nmr > '{}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 183).", "schema": null, "sql": "create trigger parted_trig_odd after insert on parted_irreg for each row\n when (bark(new.b) AND new.a % 2 = 1) execute procedure trigger_notice_ab();", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 150, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 146).", "schema": null, "sql": "SELECT lgamma(float8 '1000');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT lgamma(float8 '1000')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'memoize' (example 44).", "schema": null, "sql": "CREATE TABLE prt_p1 PARTITION OF prt FOR VALUES FROM (0) TO (10);", "explanation": "DDL from PostgreSQL core regression test for Memoize.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 33).", "schema": null, "sql": "select '$.g ? (@ == 1)'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$.g ? (@ == 1)'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 160).", "schema": null, "sql": "CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'updatable_views': Write the SELECT query (example 129).", "schema": null, "sql": "SELECT table_name, is_updatable, is_insertable_into,\n is_trigger_updatable, is_trigger_deletable,\n is_trigger_insertable_into\n FROM information_schema.views\n WHERE table_name LIKE 'rw_view%'\n ORDER BY table_name;", "explanation": "Regression test for Updatable Views in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT table_name, is_updatable, is_insertable_into,\n is_trigger_updatable, is_trigger_deletable,\n is_trigger_insertable_into\n FROM information_schema.views\n WHERE table_name LIKE 'rw_view%'\n ORDER BY table_name) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 225, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 64).", "schema": null, "sql": "INSERT INTO spill_test SELECT 'serialize-subbig-subsmall--2:'||g.i FROM generate_series(5001, 5001) g(i);", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 12).", "schema": null, "sql": "INSERT INTO DEFAULTEXPR_TBL (i2) VALUES (-4);", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "PostgreSQL Advanced: show example 2.", "schema": null, "sql": "CREATE TABLE cities ( name varchar(80) PRIMARY KEY, location point ); CREATE TABLE weather ( city varchar(80) REFERENCES cities (name), temp_lo int, temp_hi int, prcp real, date date );", "explanation": "Example from PostgreSQL documentation on Advanced.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 2} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 67).", "schema": null, "sql": "SELECT 'tx logical msg' FROM pg_logical_emit_message(true, 'test', 'tx logical msg');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 33).", "schema": null, "sql": "-- Try interval second.\nSELECT * FROM check_test(\n col_type_is( 'public', 'sometab', 'isecd', 'pg_catalog', 'interval second(0)', 'isecd is interval second(0)' ),\n true,\n 'col_type_is( sch, tab, intsec, sch, type, desc )',\n 'isecd is interval second(0)',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 276, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'fast_default' (example 50).", "schema": null, "sql": "INSERT INTO T VALUES (25), (26);", "explanation": "DML from PostgreSQL core regression test for Fast Default.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.92.0--0.93.0 (assertion 29).", "schema": null, "sql": "-- index_owner_is ( schema, table, index, user, description )\nCREATE OR REPLACE FUNCTION index_owner_is ( NAME, NAME, NAME, NAME, TEXT )\nRETURNS TEXT AS $$\nDECLARE\n owner NAME := _get_index_owner($1, $2, $3);\nBEGIN\n -- Make sure the index exists.\n IF owner IS NULL THEN\n RETURN ok(FALSE, $5) || E'\\n' || diag(\n E' Index ' || quote_ident($3) || ' ON '\n || quote_ident($1) || '.' || quote_ident($2) || ' not found'\n );\n END IF;\n\n RETURN is(owner, $4, $5);\nEND;\n$$ LANGUAGE plpgsql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.92.0--0.93.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 533, "num_statements": 6} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 249).", "schema": null, "sql": "SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',\n '\"My birthday-> Year:\" YYYY, \"Month:\" FMMonth, \"Day:\" DD');", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',\n '\"My birthday-> Year:\" YYYY, \"Month:\" FMMonth, \"Day:\" DD')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'indexing' (example 327).", "schema": null, "sql": "create table idxpart (a int4range, b int4range, c int4range) partition by range (a);", "explanation": "DDL from PostgreSQL core regression test for Indexing.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'indirect_toast': Write the SELECT query (example 13).", "schema": null, "sql": "SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;", "explanation": "Regression test for Indirect Toast in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'boolean': Write the SELECT query (example 40).", "schema": null, "sql": "SELECT ' true '::text::boolean AS true,\n ' FALSE'::text::boolean AS false;", "explanation": "Regression test for Boolean in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ' true '::text::boolean AS true,\n ' FALSE'::text::boolean AS false) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 89, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 301).", "schema": null, "sql": "create table ab_a3_b3 partition of ab_a3 for values in (3);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.2.3--2.3.0, item 13).", "schema": null, "sql": "/*\n * Apply foreign keys that exist on the given parent to the given child table\n */\nCREATE OR REPLACE FUNCTION apply_foreign_keys(p_parent_table text, p_child_table text, p_job_id bigint DEFAULT NULL, p_debug boolean DEFAULT false) RETURNS void\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_count int := 0;\nv_job_id bigint;\nv_jobmon text;\nv_jobmon_schema text;\nv_old_search_path text;\nv_parent_schema text;\nv_parent_tablename text;\nv_ref_schema text;\nv_ref_table text;\nv_row record;\nv_schemaname text;\nv_sql text;\nv_step_id bigint;\nv_tablename text;\n\nBEGIN\n\nSELECT jobmon INTO v_jobmon FROM @extschema@.part_config WHERE parent_table = p_parent_table;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', '@extschema@,'||v_jobmon_schema, 'false');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF p_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN APPLYING FOREIGN KEYS: %s', p_parent_table));\n ELSE -- Don't create a new job, add steps into given job\n v_job_id := p_job_id;\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying foreign keys to %s if they exist on parent', p_child_table));\nEND IF;\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)\nAND tablename = split_part(p_parent_table, '.', 2);\n\nSELECT schemaname, tablename INTO v_schemaname, v_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_child_table, '.', 1)\nAND tablename = split_part(p_child_table, '.', 2);\n\nIF v_tablename IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'CRITICAL', format('Target child table (%s) does not exist.', p_child_table));\n PERFORM fail_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n END IF;\n RAISE EXCEPTION 'Target child table (%) does not exist.', p_child_table;\n RETURN;\nEND IF;\n\nFOR v_row IN\n SELECT pg_get_constraintdef(con.oid) AS constraint_def\n FROM pg_catalog.pg_constraint con\n JOIN pg_catalog.pg_class c ON con.conrelid = c.oid\n JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\n WHERE c.relname = v_parent_tablename\n AND n.nspname = v_parent_schema\n AND contype = 'f'\nLOOP\n v_sql := format('ALTER TABLE %I.%I ADD %s'\n , v_schemaname\n , v_tablename\n , v_row.constraint_def);\n\n IF p_debug THEN\n RAISE NOTICE 'Constraint creation query: %', v_sql;\n END IF;\n\n EXECUTE v_sql;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'FK applied');\n END IF;\n v_count := v_count + 1;\n\nEND LOOP;\n\nIF v_count = 0 AND v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'No FKs found on parent');\nEND IF;\n\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN CREATE APPLYING FOREIGN KEYS: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 4817, "num_statements": 62} {"question": "pgTAP test for Hastap (assertion 153).", "schema": null, "sql": "/****************************************************************************/\n-- Test hasnt_cast().\n\nSELECT * FROM check_test(\n hasnt_cast( 'integer', 'bigint', 'pg_catalog', 'int8', 'desc' ),\n false,\n 'hasnt_cast( src, targ, schema, func, desc)',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 279, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 38).", "schema": null, "sql": "SELECT * FROM INTERVAL_TBL\n WHERE INTERVAL_TBL.f1 >= interval '@ 1 month';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INTERVAL_TBL\n WHERE INTERVAL_TBL.f1 >= interval '@ 1 month') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sqljson_queryfuncs' (example 242).", "schema": null, "sql": "CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$[1, 0 to $.a ? (@.datetime() == $x)]' PASSING '12:34'::time AS x));", "explanation": "DDL from PostgreSQL core regression test for Sqljson Queryfuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 6).", "schema": null, "sql": "CREATE TYPE public.sometype AS (\n id INT,\n name TEXT\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 65, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_type' (example 1).", "schema": null, "sql": "-- directory path and dlsuffix are passed to us in environment variables\n\\getenv libdir PG_LIBDIR\n\\getenv dlsuffix PG_DLSUFFIX\n\n\\set regresslib :libdir '/regress' :dlsuffix\n\n--\n-- Test the \"old style\" approach of making the I/O functions first,\n-- with no explicit shell type creation.\n--\nCREATE FUNCTION widget_in(cstring)\n RETURNS widget\n AS :'regresslib'\n LANGUAGE C STRICT IMMUTABLE;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Type.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 393, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 247).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_opclass( 'int4_ops', 'whatever' ),\n false,\n 'hasnt_opclass( name, desc )',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (util_time, item 2).", "schema": null, "sql": "-- Return the minimum for the type. For time types, it will be the\n-- Unix timestamp in microseconds.\nCREATE OR REPLACE FUNCTION _timescaledb_functions.get_internal_time_max(REGTYPE) RETURNS BIGINT\nAS '@MODULE_PATHNAME@', 'ts_get_internal_time_max' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 291, "num_statements": 1} {"question": "PostgreSQL regression test 'dbsize': Write the SELECT query (example 4).", "schema": null, "sql": "SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM\n (VALUES (10239::numeric), (10240::numeric),\n (10485247::numeric), (10485248::numeric),\n (10736893951::numeric), (10736893952::numeric),\n (10994579406847::numeric), (10994579406848::numeric),\n (11258449312612351::numeric), (11258449312612352::numeric),\n (11528652096115048447::numeric), (11528652096115048448::numeric)) x(size);", "explanation": "Regression test for Dbsize in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM\n (VALUES (10239::numeric), (10240::numeric),\n (10485247::numeric), (10485248::numeric),\n (10736893951::numeric), (10736893952::numeric),\n (10994579406847::numeric), (10994579406848::numeric),\n (11258449312612351::numeric), (11258449312612352::numeric),\n (11528652096115048447::numeric), (11528652096115048448::numeric)) x(size)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 452, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'matview' (example 23).", "schema": null, "sql": "CREATE VIEW mvtest_tvvmv AS SELECT * FROM mvtest_tvvm;", "explanation": "DDL from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (test-time-maintenance-order, item 7).", "schema": null, "sql": "CREATE TABLE partman_test.time_taptest_table3\n (col1 int\n , col2 text default 'stuff'\n , col3 timestamptz NOT NULL DEFAULT now())\n PARTITION BY RANGE (col3);", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 177, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 464).", "schema": null, "sql": "select jsonb_path_query('\"1.23aaa\"', '$.integer()');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"1.23aaa\"', '$.integer()')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 130).", "schema": null, "sql": "INSERT INTO PKTABLE VALUES (2, 4, 5, 'test4');", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT date '01 08 99';", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT date '01 08 99') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 936).", "schema": null, "sql": "INSERT INTO tru_ptable (SELECT x FROM generate_series(11,20) x);", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'inherit': Write the SELECT query (example 34).", "schema": null, "sql": "SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid;", "explanation": "Regression test for Inherit in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copy2' (example 142).", "schema": null, "sql": "CREATE FUNCTION truncate_in_subxact() RETURNS VOID AS\n$$\nBEGIN\n\tTRUNCATE vistest;\nEXCEPTION\n WHEN OTHERS THEN\n\tINSERT INTO vistest VALUES ('subxact failure');\nEND;\n$$ language plpgsql;", "explanation": "DDL from PostgreSQL core regression test for Copy2.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 185, "num_statements": 4} {"question": "Write the DDL statement from PostgreSQL regression test 'polymorphism' (example 105).", "schema": null, "sql": "CREATE AGGREGATE myaggn02b(*) (SFUNC = stfnp, STYPE = anyarray,\n INITCOND = '{}');", "explanation": "DDL from PostgreSQL core regression test for Polymorphism.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 83, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Void (example 2).", "schema": null, "sql": "-- illegal: can't return non-None value in void-returning func\nCREATE FUNCTION test_void_func2() RETURNS void AS $$\nreturn 10\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Void.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": true, "sql_length": 149, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 301).", "schema": null, "sql": "SELECT '2011-03-27 02:59:59'::timestamp AT TIME ZONE 'MSK';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-27 02:59:59'::timestamp AT TIME ZONE 'MSK') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'plpgsql' (example 759).", "schema": null, "sql": "UPDATE alter_table_under_transition_tables\n SET id = id;", "explanation": "DML from PostgreSQL core regression test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (zombodb--3000.0.6--3000.0.7, item 2).", "schema": null, "sql": "CREATE FUNCTION zdb.schema_version() RETURNS text LANGUAGE sql AS $$\nSELECT '3000.0.7 (90d8c41590be5fb35c99f5da14c1b134b0c863b9)'\n$$;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 130).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ 'a.*{3}.e';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 625).", "schema": null, "sql": "CREATE INDEX ON table2(col2);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 29, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 199).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Molly');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 19).", "schema": null, "sql": "-- has_materialized_view( materialized_view )\nCREATE OR REPLACE FUNCTION has_materialized_view ( NAME )\nRETURNS TEXT AS $$\n SELECT has_materialized_view( $1, 'Materialized view ' || quote_ident($1) || ' should exist' );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 239, "num_statements": 2} {"question": "Write the DML statement from PostgreSQL regression test 'temp' (example 14).", "schema": null, "sql": "INSERT INTO temptest VALUES (2.1);", "explanation": "DML from PostgreSQL core regression test for Temp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'float4': Write the SELECT query (example 30).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('1e400', 'float4');", "explanation": "Regression test for Float4 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('1e400', 'float4')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'bit' (example 50).", "schema": null, "sql": "INSERT INTO BIT_SHIFT_TABLE SELECT b>>4 FROM BIT_SHIFT_TABLE;", "explanation": "DML from PostgreSQL core regression test for Bit.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 96).", "schema": null, "sql": "SELECT * FROM test_type_conversion_array_date(ARRAY[[['2016-09-21','2016-09-22',NULL],[NULL,'2016-10-21','2016-10-22']],\n [[NULL,'2016-11-21','2016-10-21'],['2015-09-21','2015-09-22','2014-09-21']]]::date[]);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 219, "num_statements": 1} {"question": "PostgreSQL Xfunc: show example 32.", "schema": null, "sql": "CREATE OR REPLACE FUNCTION retcomposite(IN integer, IN integer, OUT f1 integer, OUT f2 integer, OUT f3 integer) RETURNS SETOF record AS ' filename ', 'retcomposite' LANGUAGE C IMMUTABLE STRICT;", "explanation": "Example from PostgreSQL documentation on Xfunc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 193, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 385).", "schema": null, "sql": "SELECT replace('abcdefabcdef', 'cd'::citext, 'XX') = 'abXXefabXXef' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 73, "num_statements": 1} {"question": "PostgreSQL regression test 'union': Write the SELECT query (example 22).", "schema": null, "sql": "SELECT f1 AS five FROM FLOAT8_TBL\n WHERE f1 BETWEEN -1e6 AND 1e6\nUNION\nSELECT f1 FROM INT4_TBL\n WHERE f1 BETWEEN 0 AND 1000000\nORDER BY 1;", "explanation": "Regression test for Union in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f1 AS five FROM FLOAT8_TBL\n WHERE f1 BETWEEN -1e6 AND 1e6\nUNION\nSELECT f1 FROM INT4_TBL\n WHERE f1 BETWEEN 0 AND 1000000\nORDER BY 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 140, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 102).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('Feb 29 17:32:01 1996');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'matview': Write the SELECT query (example 110).", "schema": null, "sql": "SELECT * FROM mvtest_mv_v_3;", "explanation": "Regression test for Matview in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM mvtest_mv_v_3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'spi' (example 25).", "schema": null, "sql": "insert into fkeys values (10, '1', 2);", "explanation": "Example query from the 'spi' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 135).", "schema": null, "sql": "CREATE FUNCTION gbt_date_distance(internal,date,int2,oid,internal)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 447).", "schema": null, "sql": "PREPARE annames_ord AS SELECT id, name FROM annames ORDER BY id;", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 793).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('5 . 0');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'alter_table' (example 443).", "schema": null, "sql": "INSERT INTO ataddindex(f1) VALUES ('foo'), ('a');", "explanation": "DML from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'geometry': Write the SELECT query (example 100).", "schema": null, "sql": "SELECT f1 AS open_path, polygon( pclose(f1)) AS polygon\n FROM PATH_TBL\n WHERE isopen(f1);", "explanation": "Regression test for Geometry in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f1 AS open_path, polygon( pclose(f1)) AS polygon\n FROM PATH_TBL\n WHERE isopen(f1)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 70).", "schema": null, "sql": "select '$.boolean()'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$.boolean()'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 169).", "schema": null, "sql": "SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 119, "num_statements": 1} {"question": "pgTAP test for Check (assertion 18).", "schema": null, "sql": "/****************************************************************************/\n-- Test col_has_check() with an array of columns.\n\nSET LOCAL client_min_messages = warning;", "explanation": "SQL assertion from pgTAP test suite for Check.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 170, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 91).", "schema": null, "sql": "SELECT to_regtype('int3');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_regtype('int3')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'numeric_big' (example 19).", "schema": null, "sql": "CREATE TABLE num_exp_power_10_ln (id int4, expected numeric(1000,800));", "explanation": "DDL from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'uuid' (example 7).", "schema": null, "sql": "INSERT INTO guid1(guid_field) VALUES('{22222222-2222-2222-2222-222222222222 ');", "explanation": "DML from PostgreSQL core regression test for Uuid.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 5).", "schema": null, "sql": "SELECT count(*) FROM bittmp WHERE a = '011011000100010111011000110000100';", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 56).", "schema": null, "sql": "CREATE FUNCTION lt_q_regex(ltree,_lquery)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 118, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 161).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_cast( 'integer', 'clue', 'desc' ),\n true,\n 'hasnt_cast( src, targ, desc ) fail',\n 'desc',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "Show an example of PostgreSQL INSERT (example 3).", "schema": null, "sql": "INSERT INTO films VALUES ('UA502', 'Bananas', 105, DEFAULT, 'Comedy', '82 minutes'); INSERT INTO films (code, title, did, date_prod, kind) VALUES ('T_601', 'Yojimbo', 106, DEFAULT, 'Drama');", "explanation": "PostgreSQL INSERT command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 190, "num_statements": 2} {"question": "pgTAP test for Pgtap--0.93.0--0.94.0 (assertion 3).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _fprivs_are ( TEXT, NAME, NAME[], TEXT )\nRETURNS TEXT AS $$\nDECLARE\n grants TEXT[] := _get_func_privs($2, $1);\nBEGIN\n IF grants[1] = 'undefined_function' THEN\n RETURN ok(FALSE, $4) || E'\\n' || diag(\n ' Function ' || $1 || ' does not exist'\n );\n ELSIF grants[1] = 'undefined_role' THEN\n RETURN ok(FALSE, $4) || E'\\n' || diag(\n ' Role ' || quote_ident($2) || ' does not exist'\n );\n END IF;\n RETURN _assets_are('privileges', grants, $3, $4);\nEND;\n$$ LANGUAGE plpgsql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.93.0--0.94.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 562, "num_statements": 7} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 140).", "schema": null, "sql": "CREATE PUBLICATION testpub_dups FOR TABLE testpub_rf_tbl1 WHERE (a = 1), testpub_rf_tbl1 WITH (publish = 'insert');", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 301).", "schema": null, "sql": "select a from stats_test_tab1 where a = 3;", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select a from stats_test_tab1 where a = 3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 393).", "schema": null, "sql": "SELECT i AS interval, date_trunc('week', i)\n FROM INFINITE_INTERVAL_TBL\n WHERE NOT isfinite(i);", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT i AS interval, date_trunc('week', i)\n FROM INFINITE_INTERVAL_TBL\n WHERE NOT isfinite(i)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "PostgreSQL regression test 'tidrangescan': Write the SELECT query (example 27).", "schema": null, "sql": "SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';", "explanation": "Regression test for Tidrangescan in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 27).", "schema": null, "sql": "CREATE FUNCTION pgp_pub_decrypt_bytea(bytea, bytea)\nRETURNS bytea\nAS 'MODULE_PATHNAME', 'pgp_pub_decrypt_bytea'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 154, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (create_hypertable, item 43).", "schema": null, "sql": "-- Test add_dimension: can use interval types for TIMESTAMPTZ columns\nCREATE TABLE dim_test_time(time TIMESTAMPTZ, time2 TIMESTAMPTZ, time3 BIGINT, temp float8, device int, location int);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 187, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 474).", "schema": null, "sql": "create trigger iocdu_tt_parted_update_trig\n after update on iocdu_tt_parted referencing old table as old_table new table as new_table\n for each statement execute procedure dump_update();", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 188, "num_statements": 1} {"question": "PostgreSQL regression test 'collate': Write the SELECT query (example 59).", "schema": null, "sql": "SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2;", "explanation": "Regression test for Collate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 107, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Subxact (example 10).", "schema": null, "sql": "-- Test subtransaction rollback\n\nCREATE FUNCTION subtransaction_ctx_test(what_error text = NULL) RETURNS void\nAS $$\n spi_exec \"INSERT INTO subtransaction_tbl VALUES (1)\"\n subtransaction {\n spi_exec \"INSERT INTO subtransaction_tbl VALUES (2)\"\n if {$1 == \"SPI\"} {\n spi_exec \"INSERT INTO subtransaction_tbl VALUES ('oops')\"\n } elseif { $1 == \"Tcl\"} {\n elog ERROR \"Tcl error\"\n }\n }\n$$ LANGUAGE pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Subxact.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 455, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.2.0--2.2.1, item 1).", "schema": null, "sql": "/*\n * Function to manage pre-creation of the next partitions in a set.\n * Also manages dropping old partitions if the retention option is set.\n * If p_parent_table is passed, will only run run_maintenance() on that one table (no matter what the configuration table may have set for it)\n * Otherwise, will run on all tables in the config table with p_run_maintenance() set to true.\n * For large partition sets, running analyze can cause maintenance to take longer than expected. Can set p_analyze to false to avoid a forced analyze run.\n * Be aware that constraint exclusion may not work properly until an analyze on the partition set is run.\n */\nCREATE OR REPLACE FUNCTION run_maintenance(p_parent_table text DEFAULT NULL, p_analyze boolean DEFAULT true, p_jobmon boolean DEFAULT true, p_debug boolean DEFAULT false) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_adv_lock boolean;\nv_check_subpart int;\nv_create_count int := 0;\nv_current_partition text;\nv_current_partition_id bigint;\nv_current_partition_timestamp timestamp;\nv_datetime_string text;\nv_drop_count int := 0;\nv_id_position int;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_last_partition text;\nv_last_partition_created boolean;\nv_last_partition_id bigint;\nv_last_partition_timestamp timestamp;\nv_max_id_parent bigint;\nv_max_time_parent timestamp;\nv_next_partition_id bigint;\nv_next_partition_timestamp timestamp;\nv_parent_schema text;\nv_parent_tablename text;\nv_premade_count int;\nv_premake_id_max bigint;\nv_premake_id_min bigint;\nv_premake_timestamp_min timestamp;\nv_premake_timestamp_max timestamp;\nv_quarter text;\nv_row record;\nv_row_max_id record;\nv_row_max_time record;\nv_row_sub record;\nv_skip_maint boolean;\nv_step_id bigint;\nv_step_overflow_id bigint;\nv_step_serial_id bigint;\nv_sub_id_max bigint;\nv_sub_id_max_suffix bigint;\nv_sub_id_min bigint;\nv_sub_parent text;\nv_sub_timestamp_max timestamp;\nv_sub_timestamp_max_suffix timestamp;\nv_sub_timestamp_min timestamp;\nv_tablename text;\nv_tables_list_sql text;\nv_time_position int;\nv_year text;\n\nBEGIN\n\nv_adv_lock := pg_try_advisory_xact_lock(hashtext('pg_partman run_maintenance'));\nIF v_adv_lock = 'false' THEN\n RAISE NOTICE 'Partman maintenance already running.';\n RETURN;\nEND IF;\n\nIF p_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n EXECUTE format('SELECT %I.add_job(%L)', v_jobmon_schema, 'PARTMAN RUN MAINTENANCE') INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%L, %L)', v_jobmon_schema, v_job_id, 'Running maintenance loop') INTO v_step_id;\nEND IF;\n\n-- Check for consistent data in part_config_sub table. Was unable to get this working properly as either a constraint or trigger.\n-- Would either delay raising an error until the next write (which I cannot predict) or disallow future edits to update a sub-partition set's configuration.\n-- This way at least provides a consistent way to check that I know will run. If anyone can get a working constraint/trigger, please help!\n-- Don't have to worry about this in the serial trigger maintenance since subpartitioning requires run_maintenance().\nFOR v_row IN\n SELECT sub_parent FROM @extschema@.part_config_sub\nLOOP\n SELECT count(*) INTO v_check_subpart FROM @extschema@.check_subpart_sameconfig(v_row.sub_parent);\n IF v_check_subpart > 1 THEN\n RAISE EXCEPTION 'Inconsistent data in part_config_sub table. Sub-partition tables that are themselves sub-partitions cannot have differing configuration values among their siblings.\n Run this query: \"SELECT * FROM @extschema@.check_subpart_sameconfig(''%'');\" This should only return a single row or nothing.\n If multiple rows are returned, results are all children of the given parent. Update the differing values to be consistent for your desired values.', v_row.sub_parent;\n END IF;\nEND LOOP;\n\nv_row := NULL; -- Ensure it's reset\n\n\nv_tables_list_sql := 'SELECT parent_table\n , partition_type\n , partition_interval\n , control\n , premake\n , datetime_string\n , undo_in_progress\n , sub_partition_set_full\n , epoch\n FROM @extschema@.part_config\n WHERE sub_partition_set_full = false';\n\nIF p_parent_table IS NULL THEN\n v_tables_list_sql := v_tables_list_sql || ' AND use_run_maintenance = true';\nELSE\n v_tables_list_sql := v_tables_list_sql || format(' AND parent_table = %L', p_parent_table);\nEND IF;\n\nFOR v_row IN EXECUTE v_tables_list_sql\nLOOP\n\n CONTINUE WHEN v_row.undo_in_progress;\n v_skip_maint := true; -- reset every loop\n\n SELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename FROM pg_catalog.pg_tables WHERE schemaname ||'.'|| tablename = v_row.parent_table;\n\n SELECT partition_tablename INTO v_last_partition FROM @extschema@.show_partitions(v_row.parent_table, 'DESC') LIMIT 1;\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_last_partition: %', v_row.parent_table, v_last_partition;\n END IF;\n\n IF v_row.partition_type = 'time' OR v_row.partition_type = 'time-custom' THEN\n\n v_time_position := (length(v_last_partition) - position('p_' in reverse(v_last_partition))) + 2;\n IF v_row.partition_interval::interval <> '3 months' OR (v_row.partition_interval::interval = '3 months' AND v_row.partition_type = 'time-custom') THEN\n v_last_partition_timestamp := to_timestamp(substring(v_last_partition from v_time_position), v_row.datetime_string);\n ELSE\n -- to_timestamp doesn't recognize 'Q' date string formater. Handle it\n v_year := split_part(substring(v_last_partition FROM v_time_position), 'q', 1);\n v_quarter := split_part(substring(v_last_partition FROM v_time_position), 'q', 2);\n CASE\n WHEN v_quarter = '1' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-01-01', 'YYYY-MM-DD');\n WHEN v_quarter = '2' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-04-01', 'YYYY-MM-DD');\n WHEN v_quarter = '3' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-07-01', 'YYYY-MM-DD');\n WHEN v_quarter = '4' THEN\n v_last_partition_timestamp := to_timestamp(v_year || '-10-01', 'YYYY-MM-DD');\n END CASE;\n END IF;\n\n -- Loop through child tables starting from highest to get current max value in partition set\n -- Avoids doing a scan on entire partition set and/or getting any values accidentally in parent.\n FOR v_row_max_time IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC')\n LOOP\n IF v_row.epoch = false THEN\n EXECUTE format('SELECT max(%I)::text FROM %I.%I'\n , v_row.control\n , v_row_max_time.partition_schemaname\n , v_row_max_time.partition_tablename\n ) INTO v_current_partition_timestamp;\n ELSE\n EXECUTE format('SELECT to_timestamp(max(%I))::text FROM %I.%I'\n , v_row.control\n , v_row_max_time.partition_schemaname\n , v_row_max_time.partition_tablename\n ) INTO v_current_partition_timestamp;\n END IF;\n IF v_current_partition_timestamp IS NOT NULL THEN\n SELECT suffix_timestamp INTO v_current_partition_timestamp FROM @extschema@.show_partition_name(v_row.parent_table, v_current_partition_timestamp::text);\n EXIT;\n END IF;\n END LOOP;\n -- Check for values in the parent table. If they are there and greater than all child values, use that instead\n -- This allows maintenance to continue working properly if there is a large gap in data insertion. Data will remain in parent, but new tables will be created\n IF v_row.epoch = false THEN\n EXECUTE format('SELECT max(%I) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_time_parent;\n ELSE\n EXECUTE format('SELECT to_timestamp(max(%I)) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_time_parent;\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'run_maint: v_current_partition_timestamp: %, v_max_time_parent: %', v_current_partition_timestamp, v_max_time_parent;\n END IF;\n IF v_max_time_parent > v_current_partition_timestamp THEN\n SELECT suffix_timestamp INTO v_current_partition_timestamp FROM @extschema@.show_partition_name(v_row.parent_table, v_max_time_parent::text);\n END IF;\n IF v_current_partition_timestamp IS NULL THEN\n -- Partition set is completely empty. Nothing to do\n CONTINUE;\n END IF;\n\n -- If this is a subpartition, determine if the last child table has been made. If so, mark it as full so future maintenance runs can skip it\n SELECT sub_min::timestamp, sub_max::timestamp INTO v_sub_timestamp_min, v_sub_timestamp_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'time');\n IF v_sub_timestamp_max IS NOT NULL THEN\n SELECT suffix_timestamp INTO v_sub_timestamp_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_timestamp_max::text);\n IF v_sub_timestamp_max_suffix = v_last_partition_timestamp THEN\n -- Final partition for this set is created. Set full and skip it\n UPDATE @extschema@.part_config SET sub_partition_set_full = true WHERE parent_table = v_row.parent_table;\n CONTINUE;\n END IF;\n END IF;\n\n -- Check and see how many premade partitions there are.\n v_premade_count = round(EXTRACT('epoch' FROM age(v_last_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval));\n v_next_partition_timestamp := v_last_partition_timestamp;\n IF p_debug THEN\n RAISE NOTICE 'run_maint before loop: current_partition_timestamp: %, v_premade_count: %, v_sub_timestamp_min: %, v_sub_timestamp_max: %'\n , v_current_partition_timestamp\n , v_premade_count\n , v_sub_timestamp_min\n , v_sub_timestamp_max;\n END IF;\n -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed\n WHILE (v_premade_count < v_row.premake) LOOP\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_timestamp: %', v_row.parent_table, v_premade_count, v_next_partition_timestamp;\n END IF;\n IF v_next_partition_timestamp < v_sub_timestamp_min OR v_next_partition_timestamp > v_sub_timestamp_max THEN\n -- With subpartitioning, no need to run if the timestamp is not in the parent table's range\n EXIT;\n END IF;\n BEGIN\n v_next_partition_timestamp := v_next_partition_timestamp + v_row.partition_interval::interval;\n EXCEPTION WHEN datetime_field_overflow THEN\n v_premade_count := v_row.premake; -- do this so it can exit the premake check loop and continue in the outer for loop\n IF v_jobmon_schema IS NOT NULL THEN\n EXECUTE format('SELECT %I.add_step(%L, %L)', v_jobmon_schema, v_job_id, 'Attempted partition time interval is outside PostgreSQL''s supported time range.') INTO v_step_overflow_id;\n EXECUTE format('SELECT %I.update_step(%L, %L, %L)', v_jobmon_schema, v_step_overflow_id, 'CRITICAL', 'Child partition creation skippd for parent table '||v_partition_time);\n END IF;\n RAISE WARNING 'Attempted partition time interval is outside PostgreSQL''s supported time range. Child partition creation skipped for parent table %', v_row.parent_table;\n CONTINUE;\n END;\n v_last_partition_created := @extschema@.create_partition_time(v_row.parent_table, ARRAY[v_next_partition_timestamp], p_analyze);\n IF v_last_partition_created THEN\n v_create_count := v_create_count + 1;\n PERFORM @extschema@.create_function_time(v_row.parent_table, v_job_id);\n END IF;\n\n v_premade_count = round(EXTRACT('epoch' FROM age(v_next_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval));\n END LOOP;\n ELSIF v_row.partition_type = 'id' THEN\n -- Loop through child tables starting from highest to get current max value in partition set\n -- Avoids doing a scan on entire partition set and/or getting any values accidentally in parent.\n FOR v_row_max_id IN\n SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC')\n LOOP\n EXECUTE format('SELECT max(%I)::text FROM %I.%I'\n , v_row.control\n , v_row_max_id.partition_schemaname\n , v_row_max_id.partition_tablename) INTO v_current_partition_id;\n IF v_current_partition_id IS NOT NULL THEN\n SELECT suffix_id INTO v_current_partition_id FROM @extschema@.show_partition_name(v_row.parent_table, v_current_partition_id::text);\n EXIT;\n END IF;\n END LOOP;\n -- Check for values in the parent table. If they are there and greater than all child values, use that instead\n -- This allows maintenance to continue working properly if there is a large gap in data insertion. Data will remain in parent, but new tables will be created\n EXECUTE format('SELECT max(%I) FROM ONLY %I.%I', v_row.control, v_parent_schema, v_parent_tablename) INTO v_max_id_parent;\n IF v_max_id_parent > v_current_partition_id THEN\n SELECT suffix_id INTO v_current_partition_id FROM @extschema@.show_partition_name(v_row.parent_table, v_max_id_parent::text);\n END IF;\n IF v_current_partition_id IS NULL THEN\n -- Partition set is completely empty. Nothing to do\n CONTINUE;\n END IF;\n\n v_id_position := (length(v_last_partition) - position('p_' in reverse(v_last_partition))) + 2;\n v_last_partition_id = substring(v_last_partition from v_id_position)::bigint;\n -- Determine if this table is a child of a subpartition parent. If so, get limits to see if run_maintenance even needs to run for it.\n SELECT sub_min::bigint, sub_max::bigint INTO v_sub_id_min, v_sub_id_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'id');\n IF v_sub_id_max IS NOT NULL THEN\n SELECT suffix_id INTO v_sub_id_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_id_max::text);\n IF v_sub_id_max_suffix = v_last_partition_id THEN\n -- Final partition for this set is created. Set full and skip it\n UPDATE @extschema@.part_config SET sub_partition_set_full = true WHERE parent_table = v_row.parent_table;\n CONTINUE;\n END IF;\n END IF;\n\n v_next_partition_id := v_last_partition_id;\n v_premade_count := ((v_last_partition_id - v_current_partition_id) / v_row.partition_interval::bigint);\n -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed.\n WHILE (v_premade_count < v_row.premake) LOOP\n IF p_debug THEN\n RAISE NOTICE 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_id: %', v_row.parent_table, v_premade_count, v_next_partition_id;\n END IF;\n IF v_next_partition_id < v_sub_id_min OR v_next_partition_id > v_sub_id_max THEN\n -- With subpartitioning, no need to run if the id is not in the parent table's range\n EXIT;\n END IF;\n v_next_partition_id := v_next_partition_id + v_row.partition_interval::bigint;\n v_last_partition_created := @extschema@.create_partition_id(v_row.parent_table, ARRAY[v_next_partition_id], p_analyze);\n IF v_last_partition_created THEN\n v_create_count := v_create_count + 1;\n PERFORM @extschema@.create_function_id(v_row.parent_table, v_job_id);\n END IF;\n v_premade_count := ((v_next_partition_id - v_current_partition_id) / v_row.partition_interval::bigint);\n END LOOP;\n\n END IF; -- end main IF check for time or id\n\n -- Manage additonal constraints if set\n PERFORM @extschema@.apply_constraints(p_parent_table := v_row.parent_table, p_job_id := v_job_id, p_debug := p_debug);\n\nEND LOOP; -- end of creation loop\n\n-- Manage dropping old partitions if retention option is set\nFOR v_row IN\n SELECT parent_table FROM @extschema@.part_config WHERE retention IS NOT NULL AND undo_in_progress = false AND\n (partition_type = 'time' OR partition_type = 'time-custom')\nLOOP\n IF p_parent_table IS NULL THEN\n v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table);\n ELSE -- Only run retention on table given in parameter\n IF p_parent_table <> v_row.parent_table THEN\n CONTINUE;\n ELSE\n v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table);\n END IF;\n END IF;\n IF v_drop_count > 0 THEN\n PERFORM @extschema@.create_function_time(v_row.parent_table, v_job_id);\n END IF;\nEND LOOP;\nFOR v_row IN\n SELECT parent_table FROM @extschema@.part_config WHERE retention IS NOT NULL AND undo_in_progress = false AND partition_type = 'id'\nLOOP\n IF p_parent_table IS NULL THEN\n v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table);\n ELSE -- Only run retention on table given in parameter\n IF p_parent_table <> v_row.parent_table THEN\n CONTINUE;\n ELSE\n v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table);\n END IF;\n END IF;\n IF v_drop_count > 0 THEN\n PERFORM @extschema@.create_function_id(v_row.parent_table, v_job_id);\n END IF;\nEND LOOP;\n\nIF v_jobmon_schema IS NOT NULL THEN\n EXECUTE format('SELECT %I.update_step(%L, %L, ''Partition maintenance finished. %s partitions made. %s partitions dropped.'')'\n , v_jobmon_schema\n , v_step_id\n , 'OK'\n , v_create_count\n , v_drop_count);\n IF v_step_overflow_id IS NOT NULL OR v_step_serial_id IS NOT NULL THEN\n EXECUTE format('SELECT %I.fail_job(%L)', v_jobmon_schema, v_job_id);\n ELSE\n EXECUTE format('SELECT %I.close_job(%L)', v_jobmon_schema, v_job_id);\n END IF;\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN RUN MAINTENANCE'')', v_jobmon_schema) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 21291, "num_statements": 195} {"question": "PostgreSQL regression test 'json': Write the SELECT query (example 8).", "schema": null, "sql": "SELECT row_to_json(row((select array_agg(x) as d from generate_series(5,10) x)),false);", "explanation": "Regression test for Json in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT row_to_json(row((select array_agg(x) as d from generate_series(5,10) x)),false)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 87, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 62).", "schema": null, "sql": "SELECT '[{\"attributes\" : [1,2], \"dependency\" : {}, \"degree\": 1.0}]'::pg_dependencies;", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [1,2], \"dependency\" : {}, \"degree\": 1.0}]'::pg_dependencies) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 85, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 388).", "schema": null, "sql": "create table rule_and_refint_t2 (\n\tid2a integer,\n\tid2c integer,\n\n\tprimary key (id2a, id2c)\n);", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 86).", "schema": null, "sql": "SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)\n FROM generate_series(1, 30) g;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)\n FROM generate_series(1, 30) g) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'hash_index' (example 48).", "schema": null, "sql": "UPDATE hash_f8_heap\n SET seqno = 20000\n WHERE hash_f8_heap.random = '488912369'::float8;", "explanation": "DML from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 92, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 262).", "schema": null, "sql": "select count(*) from test_range_spgist where ir @> 10;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_range_spgist where ir @> 10) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 287).", "schema": null, "sql": "INSERT INTO caster (bitv) VALUES ('101'::citext);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_buffercache' (example 37).", "schema": null, "sql": "SELECT * FROM pg_buffercache_mark_dirty(:max_buffers);", "explanation": "Example query from the 'pg_buffercache' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 216).", "schema": null, "sql": "insert into rtest_nothn4 values (30, 'don''t want this');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_import': Write the SELECT query (example 224).", "schema": null, "sql": "SELECT pg_catalog.pg_restore_extended_stats(\n 'schemaname', 'stats_import',\n 'relname', 'test',\n 'statistics_schemaname', 'stats_import',\n 'statistics_name', 'test_stat_mcv',\n 'inherited', false,\n 'most_common_vals', '{{four,NULL},\n {one,\"(1,1.1,ONE,01-01-2001,\\\"{\\\"\\\"xkey\\\"\\\": \\\"\\\"xval\\\"\\\"}\\\")\"},\n {tre,\"(3,3.3,TRE,03-03-2003,)\"},\n {two,\"(2,2.2,TWO,02-02-2002,\\\"[true, 4, \\\"\\\"six\\\"\\\"]\\\")\"}}'::text[],\n 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[]);", "explanation": "Regression test for Stats Import in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_catalog.pg_restore_extended_stats(\n 'schemaname', 'stats_import',\n 'relname', 'test',\n 'statistics_schemaname', 'stats_import',\n 'statistics_name', 'test_stat_mcv',\n 'inherited', false,\n 'most_common_vals', '{{four,NULL},\n {one,\"(1,1.1,ONE,01-01-2001,\\\"{\\\"\\\"xkey\\\"\\\": \\\"\\\"xval\\\"\\\"}\\\")\"},\n {tre,\"(3,3.3,TRE,03-03-2003,)\"},\n {two,\"(2,2.2,TWO,02-02-2002,\\\"[true, 4, \\\"\\\"six\\\"\\\"]\\\")\"}}'::text[],\n 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 545, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 185).", "schema": null, "sql": "UPDATE unique_tbl SET i = 2 WHERE i = 4 AND t = 'four';", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 796).", "schema": null, "sql": "SELECT jsonb_path_exists('[{\"a\": 1}, {\"a\": 2}, {\"a\": 3}, {\"a\": 5}]', '$[*] ? (@.a > $min && @.a < $max)', vars => '{\"min\": 3, \"max\": 4}');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_path_exists('[{\"a\": 1}, {\"a\": 2}, {\"a\": 3}, {\"a\": 5}]', '$[*] ? (@.a > $min && @.a < $max)', vars => '{\"min\": 3, \"max\": 4}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 138, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 492).", "schema": null, "sql": "DELETE FROM num_result;", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 66).", "schema": null, "sql": "select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f);", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_aggregation", "is_postgresql_specific": true, "sql_length": 153, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_trgm' (example 12).", "schema": null, "sql": "select similarity('wow',' WOW ');", "explanation": "Example query from the 'pg_trgm' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamp': Write the SELECT query (example 129).", "schema": null, "sql": "select date_bin('365000 days'::interval, '4400-01-01 BC'::timestamp, '4000-01-01 BC'::timestamp);", "explanation": "Regression test for Timestamp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select date_bin('365000 days'::interval, '4400-01-01 BC'::timestamp, '4000-01-01 BC'::timestamp)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 360).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (8,7,'-82953604');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 82).", "schema": null, "sql": "SELECT '1'::cube < '2'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 58).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('97FEB10 5:32:01PM UTC');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 159).", "schema": null, "sql": "SELECT ARRAY[1,2] || ARRAY[3,4] AS \"{1,2,3,4}\";", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ARRAY[1,2] || ARRAY[3,4] AS \"{1,2,3,4}\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 47, "num_statements": 1} {"question": "pgTAP test for Valueset (assertion 302).", "schema": null, "sql": "-- Fail with an extra record.\nSELECT * FROM check_test(\n set_eq(\n 'justnames',\n ARRAY['Andrew', 'Anna', 'Antonio', 'Angelina', 'Andrea', 'Angel' ]\n ),\n false,\n 'set_eq(prepared, array) extra record',\n '',\n ' Extra records:\n (Anthony)'\n);", "explanation": "SQL assertion from pgTAP test suite for Valueset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 279, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Cache (example 7).", "schema": null, "sql": "$$;\n\nselect show_result_type('select 1 as a');\n-- currently this fails due to cached plan for pg_typeof expression\n-- (but if debug_discard_caches is on, it will succeed)\nselect show_result_type('select 2.0 as a');\n\n-- but it's OK if we force plan rebuilding\ndiscard plans;\nselect show_result_type('select 2.0 as a');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Cache.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 317, "num_statements": 5} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 78).", "schema": null, "sql": "SELECT '[1,1,2]'::jsonb @> '[1,2,2]'::jsonb;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[1,1,2]'::jsonb @> '[1,2,2]'::jsonb) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 44, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (size_utils, item 3).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_functions.relation_approximate_size(relation REGCLASS)\nRETURNS TABLE (total_size BIGINT, heap_size BIGINT, index_size BIGINT, toast_size BIGINT)\nAS '@MODULE_PATHNAME@', 'ts_relation_approximate_size' LANGUAGE C STRICT VOLATILE;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 267, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 603).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _do_ne( TEXT, TEXT, TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 12).", "schema": null, "sql": "SELECT '[{\"attributes\" : [2,3], \"invalid\" : 3, \"ndistinct\" : 4}]'::pg_ndistinct;", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [2,3], \"invalid\" : 3, \"ndistinct\" : 4}]'::pg_ndistinct) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 215).", "schema": null, "sql": "select null::int = all ('{1,2,3}');", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select null::int = all ('{1,2,3}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 4).", "schema": null, "sql": "SELECT * FROM test_argresult_booltrue(false, true);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "pgTAP test for Ownership (assertion 102).", "schema": null, "sql": "/****************************************************************************/\n-- Test index_owner_is().\nSELECT * FROM check_test(\n index_owner_is('someschema', 'anothertab', 'idx_name', current_user, 'mumble'),\n\ttrue,\n 'index_owner_is(schema, table, index, user, desc)',\n 'mumble',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Ownership.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 301, "num_statements": 1} {"question": "PostgreSQL regression test 'object_address': Write the SELECT query (example 48).", "schema": null, "sql": "SELECT pg_get_object_address('schema', '{one}', '{}');", "explanation": "Regression test for Object Address in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_get_object_address('schema', '{one}', '{}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 263).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (6,0,'.0469370721950711508944806393077762204079964905145503836835397203739563036579760026190241480514409364');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 141, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 969).", "schema": null, "sql": "CREATE TABLE ref1_2 PARTITION OF ref FOR VALUES IN (1, 2) PARTITION BY list (f2);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 81, "num_statements": 1} {"question": "pgTAP test for Policy (assertion 20).", "schema": null, "sql": "SELECT * FROM check_test(\n policies_are( 'public', 'passwd', ARRAY['root_all', 'all_view', 'user_mod', 'daemon_insert', 'daemon_delete'] ),\n true,\n 'policies_are(schema, table, policies)',\n 'Table public.passwd should have the correct policies',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Policy.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 267, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 5).", "schema": null, "sql": "CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rules' (example 36).", "schema": null, "sql": "create rule rtest_t6_ins as on insert to rtest_t6\n\t\twhere new.a > 25 do instead\n\tinsert into rtest_t8 values (new.a, new.b);", "explanation": "DDL from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 124, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_split' (example 1).", "schema": null, "sql": "CREATE SCHEMA partition_split_schema;", "explanation": "DDL from PostgreSQL core regression test for Partition Split.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 286).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (6,8,'7031444034.53149906');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_split' (example 109).", "schema": null, "sql": "CREATE TABLE sales (salesperson_id INT REFERENCES salespeople(salesperson_id), sales_amount INT, sales_date DATE);", "explanation": "DDL from PostgreSQL core regression test for Partition Split.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "PostgreSQL regression test 'create_operator': Write the SELECT query (example 21).", "schema": null, "sql": "SELECT false<=-1 BETWEEN 1 AND 1;", "explanation": "Regression test for Create Operator in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT false<=-1 BETWEEN 1 AND 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'replica_identity' (example 8).", "schema": null, "sql": "CREATE UNIQUE INDEX test_replica_identity_expr ON test_replica_identity (keya, keyb, (3));", "explanation": "DDL from PostgreSQL core regression test for Replica Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgstattuple' (example 27).", "schema": null, "sql": "select pgstathashindex('test_ginidx');", "explanation": "Example query from the 'pgstattuple' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 300).", "schema": null, "sql": "INSERT INTO arraggtest (f1, f2, f3) VALUES\n('{}','{{pink,white,blue,red,grey,orange}}','{2.1,1.87,1.4,2.2}');", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 177).", "schema": null, "sql": "create view vv1 as select * from (tt5 cross join tt6) j(aa,bb,cc,dd);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 69, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 219).", "schema": null, "sql": "SELECT to_tsquery('english', '1 <-> (2 <-> a)');", "explanation": "Regression test for Tsearch in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_tsquery('english', '1 <-> (2 <-> a)')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 69).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '1', 'strict $.a' DEFAULT 'error' ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '1', 'strict $.a' DEFAULT 'error' ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 68, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 524).", "schema": null, "sql": "CREATE TABLE plt2_adv_p2 PARTITION OF plt2_adv FOR VALUES IN (NULL);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 325).", "schema": null, "sql": "-- Test diagnostics\nSELECT * FROM check_test(\n is_window( 'nooo' ),\n false,\n 'is_window(nowin)',\n 'Function nooo() should be a window function',\n ' Function nooo() does not exist'\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 200, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 140).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '*{2,3}.e';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 85).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb 'null', '$a' PASSING point ' (1, 2 )' AS a RETURNING point);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb 'null', '$a' PASSING point ' (1, 2 )' AS a RETURNING point)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 84, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 161).", "schema": null, "sql": "select f3, myaggn08a(f1) from t group by f3 order by f3;", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select f3, myaggn08a(f1) from t group by f3 order by f3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 1175).", "schema": null, "sql": "CREATE POLICY p4 ON rls_tbl FOR DELETE USING (c1 <= 3);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 127).", "schema": null, "sql": "CREATE FUNCTION transition_table_test_f() RETURNS trigger LANGUAGE plpython3u AS\n$$\n rv = plpy.execute(\"SELECT * FROM old_table\")\n assert(rv.nrows() == 1)\n plpy.info(\"old: \" + str(rv[0][\"id\"]) + \" -> \" + rv[0][\"name\"])\n rv = plpy.execute(\"SELECT * FROM new_table\")\n assert(rv.nrows() == 1)\n plpy.info(\"new: \" + str(rv[0][\"id\"]) + \" -> \" + rv[0][\"name\"])\n return None\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 391, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_ndistinct': Write the SELECT query (example 37).", "schema": null, "sql": "SELECT '[{\"attributes\" : [2,3], \"ndistinct\" : null}]'::pg_ndistinct;", "explanation": "Regression test for Pg Ndistinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '[{\"attributes\" : [2,3], \"ndistinct\" : null}]'::pg_ndistinct) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 353).", "schema": null, "sql": "SELECT * FROM check_test(\n is_strict( 'yay', 'whatever' ),\n true,\n 'is_strict(func, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 32).", "schema": null, "sql": "--\n-- PGP key ID\n--\nCREATE FUNCTION pgp_key_id(bytea)\nRETURNS text\nAS 'MODULE_PATHNAME', 'pgp_key_id_w'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'encoding' (example 58).", "schema": null, "sql": "DROP FUNCTION test_wchars_to_text;", "explanation": "PL/pgSQL object from PostgreSQL core test for Encoding.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'update' (example 104).", "schema": null, "sql": "-- Enabling OLD TABLE capture for both DELETE as well as UPDATE stmt triggers\n-- should not cause DELETEd rows to be captured twice. Similar thing for\n-- INSERT triggers and inserted rows.\nCREATE TRIGGER trans_deletetrig\n AFTER DELETE ON range_parted REFERENCING OLD TABLE AS old_table\n FOR EACH STATEMENT EXECUTE PROCEDURE trans_updatetrigfunc();", "explanation": "PL/pgSQL object from PostgreSQL core test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 349, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 170).", "schema": null, "sql": "CREATE FUNCTION gbt_macad_union(internal, internal)\nRETURNS gbtreekey16\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 135, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_split' (example 190).", "schema": null, "sql": "CREATE TABLE sales_all PARTITION OF sales_list FOR VALUES IN ('Warsaw', 'Lisbon', 'New York', 'Madrid', 'Beijing', 'Berlin', 'Delhi', 'Kyiv', 'Vladivostok', NULL);", "explanation": "DDL from PostgreSQL core regression test for Partition Split.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 163, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Subxact (example 5).", "schema": null, "sql": "INSERT INTO subtransaction_tbl VALUES(0);", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Subxact.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'hstore' (example 137).", "schema": null, "sql": "select pg_column_size(slice(hstore 'aa=>1, b=>2, c=>3', ARRAY['c','b','aa']))\n = pg_column_size('aa=>1, b=>2, c=>3'::hstore);", "explanation": "Example query from the 'hstore' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "CREATE FUNCTION gbt_ts_fetch(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 105, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 284).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (6,8,'168782.57763026');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 2).", "schema": null, "sql": "SELECT oid, dictname\nFROM pg_ts_dict\nWHERE dictnamespace = 0 OR dictowner = 0 OR dicttemplate = 0;", "explanation": "Regression test for Tsearch in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT oid, dictname\nFROM pg_ts_dict\nWHERE dictnamespace = 0 OR dictowner = 0 OR dicttemplate = 0) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 98, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 159).", "schema": null, "sql": "select id, f_numeric, first_value(id) over w, last_value(id) over w\nfrom numerics\nwindow w as (order by f_numeric range between\n 'inf' following and 'inf' following);", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select id, f_numeric, first_value(id) over w, last_value(id) over w\nfrom numerics\nwindow w as (order by f_numeric range between\n 'inf' following and 'inf' following)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 178, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 271).", "schema": null, "sql": "INSERT INTO caster (path) VALUES ('((0,0),(1,1),(2,0))'::citext);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_partitioning, item 9).", "schema": null, "sql": "CREATE TABLE partitioning_hash_test_0 PARTITION OF partitioning_hash_test FOR VALUES WITH (MODULUS 3, REMAINDER 0);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "pgTAP test for Inheritance (assertion 144).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_descendent_of( 'nope', 'parent' ),\n true,\n 'isnt_descendent_of(nope, ptab)',\n 'Table nope should not be a descendent of parent',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Inheritance.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 120).", "schema": null, "sql": "--GiST support for ltree[]\nCREATE FUNCTION _ltree_consistent(internal,_ltree,int2,oid,internal)\nRETURNS bool\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 172, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 303).", "schema": null, "sql": "SELECT '2011-03-27 03:00:01'::timestamp AT TIME ZONE 'MSK';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '2011-03-27 03:00:01'::timestamp AT TIME ZONE 'MSK') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 55).", "schema": null, "sql": "-- Unicode variant\n\nCREATE FUNCTION stupid3u() RETURNS trigger\nAS $$\n return \"foo\"\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL regression test 'merge': Write the SELECT query (example 507).", "schema": null, "sql": "SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate;", "explanation": "Regression test for Merge in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tableoid::regclass, * FROM measurement ORDER BY city_id, logdate) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 42).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_function( 'decode', '{text,text}'::name[] ),\n false,\n 'simple function with 2 args',\n 'Function decode(text, text) should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 188, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 813).", "schema": null, "sql": "INSERT INTO num_input_test(n1) VALUES ('0x__1234');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 306).", "schema": null, "sql": "select * from comptable;", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from comptable) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 24, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 14).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.drop_chunk(chunk regclass) RETURNS boolean LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.drop_chunk(regclass) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n RETURN _timescaledb_functions.drop_chunk($1);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 509, "num_statements": 4} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 13).", "schema": null, "sql": "SELECT count(*) FROM timestamptmp WHERE a = '2004-10-26 08:55:08'::timestamp;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 683).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION domain_type_is( NAME, TEXT, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert' (example 117).", "schema": null, "sql": "insert into part_default values ('aa', 2);", "explanation": "DML from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 54).", "schema": null, "sql": "INSERT INTO INSERT_TBL VALUES (7, '!check failed', -7);", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 648).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION isa_ok( anyelement, regtype );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 647).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION isa_ok( anyelement, regtype, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 71, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'brin_multi' (example 59).", "schema": null, "sql": "UPDATE brintest_multi SET int8col = int8col * int4col;", "explanation": "DML from PostgreSQL core regression test for Brin Multi.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.0.0--1.1.0 (assertion 12).", "schema": null, "sql": "-- _keys( table, constraint_type )\nCREATE OR REPLACE FUNCTION _keys ( NAME, CHAR )\nRETURNS SETOF NAME[] AS $$\n SELECT _pg_sv_column_array(x.conrelid,x.conkey) -- name[] doesn't support collation\n FROM pg_catalog.pg_class c\n JOIN pg_catalog.pg_constraint x ON c.oid = x.conrelid\n AND c.relname = $1\n AND x.contype = $2\n WHERE pg_catalog.pg_table_is_visible(c.oid)\n ORDER BY 1\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.0.0--1.1.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 422, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'box' (example 4).", "schema": null, "sql": "INSERT INTO BOX_TBL (f1) VALUES ('((-8, 2), (-2, -10))');", "explanation": "DML from PostgreSQL core regression test for Box.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 43).", "schema": null, "sql": "UPDATE arrtest\n SET c[NULL:1] = '{\"can''t assign\"}'\n WHERE array_dims(c) is not null;", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'join' (example 224).", "schema": null, "sql": "insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x;", "explanation": "DML from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 240).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (5,7,'-83012087.961509');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 263).", "schema": null, "sql": "-- Handle fail with a dupe.\nSELECT * FROM check_test(\n set_ne(\n 'SELECT 1 AS a, ''Anna''::text UNION ALL SELECT 86, ''Angelina'' UNION ALL SELECT 1, ''Anna''',\n 'SELECT 1 AS a, ''Anna''::text UNION ALL SELECT 86, ''Angelina'''\n ),\n false,\n 'set_ne fail with dupe',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 308, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 57).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('97/02/10 17:32:01 UTC');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 177).", "schema": null, "sql": "CREATE FUNCTION gbt_bpchar_compress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'stats_ext': Write the SELECT query (example 606).", "schema": null, "sql": "SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');", "explanation": "Regression test for Stats Ext in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 95, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 91).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (1,7,'818934540071930985.39540024977626076239847863600785982737155858270959890014613035727868293618673807776733416230953723818527101593495895350807775607346277892835514324320448949370623441059033804864158715021903312693889518990256881059434042443507529601095150710777634743301398926463888783847290873199395304998050753365215426971278237920063435565949203678024225270616295573678510929020831006146661747271783837653203039829647102027431761129518881525935216608429897041525858540380754759125150233053469999022855035');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 547, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 706).", "schema": null, "sql": "select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl;", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL Xfunc: show example 53.", "schema": null, "sql": "SELECT mleast(ARRAY[10, -1, 5, 4.4]); -- doesn't work;", "explanation": "Example from PostgreSQL documentation on Xfunc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 2} {"question": "pgTAP test for Functap (assertion 34).", "schema": null, "sql": "SELECT * FROM check_test(\n hasnt_function( 'pg_catalog', 'now'::name ),\n false,\n 'simple schema.function',\n 'Function pg_catalog.now() should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 175, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'line' (example 21).", "schema": null, "sql": "INSERT INTO LINE_TBL VALUES (line(point '(1,0)', point '(1,0)'));", "explanation": "DML from PostgreSQL core regression test for Line.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'uuid': Write the SELECT query (example 17).", "schema": null, "sql": "SELECT guid_field FROM guid1 ORDER BY guid_field DESC;", "explanation": "Regression test for Uuid in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT guid_field FROM guid1 ORDER BY guid_field DESC) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 13).", "schema": null, "sql": "SELECT count(*) FROM moneytmp WHERE a = '22649.64'::money;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 669).", "schema": null, "sql": "select '{\"aa\":1 , \"b\":2, \"cq\":3}'::jsonb || '{\"cq\":\"l\", \"b\":\"g\", \"fg\":false}';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{\"aa\":1 , \"b\":2, \"cq\":3}'::jsonb || '{\"cq\":\"l\", \"b\":\"g\", \"fg\":false}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 78, "num_statements": 1} {"question": "Show a SQL definition from the citus project (chbenchmark_all_queries, item 14).", "schema": null, "sql": "CREATE TABLE item (\n i_id int NOT NULL,\n i_name varchar(24) NOT NULL,\n i_price decimal(5,2) NOT NULL,\n i_data varchar(50) NOT NULL,\n i_im_id int NOT NULL,\n PRIMARY KEY (i_id)\n);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 195, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 176).", "schema": null, "sql": "SELECT * FROM check_test(\n isnt_normal_function( 'public', 'tap_accum'::name ),\n true,\n 'isnt_normal_function(schema, agg)',\n 'Function public.tap_accum() should not be a normal function',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_behavioral_analytics_create_table, item 46).", "schema": null, "sql": "CREATE INDEX is_index6 ON events_table(value_2);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 17).", "schema": null, "sql": "SELECT * FROM test_int8 WHERE i<1::int4 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 105).", "schema": null, "sql": "select * from jsonb_path_query('[1,\"1\",2,\"2\",null]', '$[*] ? (@ == $value)', '{\"value\" : \"1\"}');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from jsonb_path_query('[1,\"1\",2,\"2\",null]', '$[*] ? (@ == $value)', '{\"value\" : \"1\"}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'rangefuncs' (example 363).", "schema": null, "sql": "-- without the \"offset 0\", this function gets optimized quite differently\n\ncreate function extractq2_2_opt(t int8_tbl) returns table(ret1 int8) as $$\n select extractq2(t)\n$$ language sql immutable;", "explanation": "PL/pgSQL object from PostgreSQL core test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 198, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'seg' (example 63).", "schema": null, "sql": "SELECT '1.0(+-)0.005'::seg AS seg;", "explanation": "Example query from the 'seg' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 25).", "schema": null, "sql": "-- foreign_tables_are( tables )\nCREATE OR REPLACE FUNCTION foreign_tables_are ( NAME[] )\nRETURNS TEXT AS $$\n SELECT _are(\n 'foreign tables', _extras('f', $1), _missing('f', $1),\n 'Search path ' || pg_catalog.current_setting('search_path') || ' should have the correct foreign tables'\n );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 324, "num_statements": 2} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 126).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('-', 'regoperator');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('-', 'regoperator')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 320).", "schema": null, "sql": "SELECT count(*) FROM ltreetest WHERE t ~ '23.*.2';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 242).", "schema": null, "sql": "insert into rtest_view1 values (2, 'item 2', 't');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_function_sql' (example 58).", "schema": null, "sql": "-- check display of function arguments in sub-SELECT\nCREATE TABLE functest1 (i int);", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Function Sql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'generated_stored' (example 91).", "schema": null, "sql": "INSERT INTO gtest3a (a) VALUES ('a'), ('b'), ('c'), (NULL);", "explanation": "DML from PostgreSQL core regression test for Generated Stored.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'boolean': Write the SELECT query (example 20).", "schema": null, "sql": "SELECT bool 'on_' AS error;", "explanation": "Regression test for Boolean in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT bool 'on_' AS error) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 376).", "schema": null, "sql": "CREATE TABLE notnull_tbl4_lk2 (LIKE notnull_tbl4 INCLUDING INDEXES);", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 229).", "schema": null, "sql": "CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010');", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT timestamp with time zone 'J2452271 04:05:06-08';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT timestamp with time zone 'J2452271 04:05:06-08') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'copy': Write the SELECT query (example 9).", "schema": null, "sql": "select * from copytest except select * from copytest2;", "explanation": "Regression test for Copy in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from copytest except select * from copytest2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 31).", "schema": null, "sql": "SELECT regexp_replace(data, '^(.{100}).*(.{100})$', '\\1..\\2') FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE ROLE (example 2).", "schema": null, "sql": "CREATE USER davide WITH PASSWORD 'jw8s0F4';", "explanation": "PostgreSQL CREATE ROLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Spi (example 31).", "schema": null, "sql": "-- cursor objects\n\nCREATE FUNCTION simple_cursor_test() RETURNS int AS $$\nres = plpy.cursor(\"select fname, lname from users\")\ndoes = 0\nfor row in res:\n if row['lname'] == 'doe':\n does += 1\nreturn does\n$$ LANGUAGE plpython3u;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Spi.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 234, "num_statements": 1} {"question": "pgTAP test for Pgtap--0.94.0--0.95.0 (assertion 7).", "schema": null, "sql": "-- is_member_of( role, members[] )\nCREATE OR REPLACE FUNCTION is_member_of( NAME, NAME[] )\nRETURNS TEXT AS $$\n SELECT is_member_of( $1, $2, 'Should have members of role ' || quote_ident($1) );\n$$ LANGUAGE SQL;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--0.94.0--0.95.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 212, "num_statements": 2} {"question": "PostgreSQL regression test 'select_distinct': Write the SELECT query (example 75).", "schema": null, "sql": "SELECT 2 IS NOT DISTINCT FROM null as \"no\";", "explanation": "Regression test for Select Distinct in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 2 IS NOT DISTINCT FROM null as \"no\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'triggers' (example 249).", "schema": null, "sql": "create trigger parted_trigger after update on parted_trigger\n for each row when (new.a % 2 = 1 and length(old.b) >= 2) execute procedure trigger_notice_ab();", "explanation": "DDL from PostgreSQL core regression test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 158, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Subxact (example 2).", "schema": null, "sql": "--\n-- We use this wrapper to catch errors and return errormsg only,\n-- because values of $::errorinfo variable contain procedure name which\n-- includes OID, so it's not stable\n--\nCREATE FUNCTION pltcl_wrapper(statement text) RETURNS text\nAS $$\n if [catch {spi_exec $1} msg] {\n return \"ERROR: $msg\"\n } else {\n return \"SUCCESS: $msg\"\n }\n$$ LANGUAGE pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Subxact.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 376, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 89).", "schema": null, "sql": "SELECT power(float8 '-inf', float8 '-3');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT power(float8 '-inf', float8 '-3')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 51).", "schema": null, "sql": "select '$.a[$a + 1, ($b[*]) to -($[0] * 2)]'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '$.a[$a + 1, ($b[*]) to -($[0] * 2)]'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 19).", "schema": null, "sql": "CREATE FUNCTION dblink_close (text, text)\nRETURNS text\nAS 'MODULE_PATHNAME','dblink_close'\nLANGUAGE C STRICT PARALLEL RESTRICTED;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 129, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'tablesample' (example 52).", "schema": null, "sql": "create table parted_sample_1 partition of parted_sample for values in (1);", "explanation": "DDL from PostgreSQL core regression test for Tablesample.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 122).", "schema": null, "sql": "SELECT jsonb '{\"a\":null, \"b\":\"qq\"}' ? 'c';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb '{\"a\":null, \"b\":\"qq\"}' ? 'c') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 107).", "schema": null, "sql": "insert into arrtest_f values(2,'cat1',1.24);", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 14).", "schema": null, "sql": "SELECT count(*) FROM int8tmp WHERE a >= 464571291354841::int8;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 371).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_cast ( NAME, NAME, NAME, NAME, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "PostgreSQL regression test 'create_misc': Write the SELECT query (example 67).", "schema": null, "sql": "SELECT class, c FROM e_star* x WHERE x.c NOTNULL;", "explanation": "Regression test for Create Misc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT class, c FROM e_star* x WHERE x.c NOTNULL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'expressions': Write the SELECT query (example 66).", "schema": null, "sql": "select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);", "explanation": "Regression test for Expressions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_agg_approximate_distinct, item 22).", "schema": null, "sql": "CREATE TABLE test_count_distinct_schema.nation_hash(\n n_nationkey integer not null,\n n_name char(25) not null,\n n_regionkey integer not null,\n n_comment varchar(152)\n);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 180, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 7).", "schema": null, "sql": "SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 1} {"question": "pgTAP test for Index (assertion 70).", "schema": null, "sql": "SELECT * FROM check_test(\n index_is_primary( 'sometab', 'sometab_pkey' ),\n true,\n 'index_is_primary() no schema',\n 'Index sometab_pkey should be on a primary key',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 185, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'int4' (example 6).", "schema": null, "sql": "INSERT INTO INT4_TBL(f1) VALUES ('- 1234');", "explanation": "DML from PostgreSQL core regression test for Int4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 220).", "schema": null, "sql": "-- Create the operator class (intentionally not DEFAULT)\nCREATE OPERATOR CLASS gist_cidr_ops\nFOR TYPE cidr USING gist\nAS\n\tOPERATOR\t1\t< (inet, inet) ,\n\tOPERATOR\t2\t<= (inet, inet) ,\n\tOPERATOR\t3\t= (inet, inet) ,\n\tOPERATOR\t4\t>= (inet, inet) ,\n\tOPERATOR\t5\t> (inet, inet) ,\n\tOPERATOR\t6\t<> (inet, inet) ,\n\tFUNCTION\t1\tgbt_inet_consistent (internal, inet, int2, oid, internal),\n\tFUNCTION\t2\tgbt_inet_union (internal, internal),\n\tFUNCTION\t3\tgbt_inet_compress (internal),\n\tFUNCTION\t4\tgbt_decompress (internal),\n\tFUNCTION\t5\tgbt_inet_penalty (internal, internal, internal),\n\tFUNCTION\t6\tgbt_inet_picksplit (internal, internal),\n\tFUNCTION\t7\tgbt_inet_same (gbtreekey16, gbtreekey16, internal),\n\t-- no fetch support, the compress function is lossy\n\tFUNCTION\t11\tgbt_inet_sortsupport (internal),\n\tFUNCTION\t12 (\"any\", \"any\") gist_translate_cmptype_btree (int),\n\tSTORAGE\t\tgbtreekey16;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 865, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_view' (example 47).", "schema": null, "sql": "CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp_table);", "explanation": "DDL from PostgreSQL core regression test for Create View.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Domain (example 75).", "schema": null, "sql": "SELECT * FROM test_assign_ordered_named_pairs(1,2,0); -- should fail someday\n\nCREATE FUNCTION test_null_ordered_named_pair()\n RETURNS ordered_named_pair AS $$\ndeclare v ordered_named_pair;\nbegin\nreturn v;\nend\n$$ LANGUAGE plpgsql;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Domain.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 231, "num_statements": 4} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 292).", "schema": null, "sql": "create table ab_a2_b2 partition of ab_a2 for values in (2);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 352).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION _is_super( NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowtypes' (example 149).", "schema": null, "sql": "CREATE FUNCTION price_key_from_table(price) RETURNS price_key AS $$\n SELECT $1.id\n$$ LANGUAGE SQL;", "explanation": "DDL from PostgreSQL core regression test for Rowtypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 101, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rangetypes' (example 45).", "schema": null, "sql": "INSERT INTO numrange_test VALUES('[3,]');", "explanation": "DML from PostgreSQL core regression test for Rangetypes.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_jsontable': Write the SELECT query (example 80).", "schema": null, "sql": "SELECT * FROM JSON_TABLE(jsonb '\"world\"', '$' COLUMNS (item text FORMAT JSON PATH '$' WITH WRAPPER KEEP QUOTES));", "explanation": "Regression test for Sqljson Jsontable in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM JSON_TABLE(jsonb '\"world\"', '$' COLUMNS (item text FORMAT JSON PATH '$' WITH WRAPPER KEEP QUOTES))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'fast_default' (example 3).", "schema": null, "sql": "CREATE TABLE m(id OID);", "explanation": "DDL from PostgreSQL core regression test for Fast Default.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 23, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.3.2--2.3.3, item 14).", "schema": null, "sql": "/*\n * Function to create partitioning trigger on parent table\n */\nCREATE OR REPLACE FUNCTION create_trigger(p_parent_table text) RETURNS void\n LANGUAGE plpgsql SECURITY DEFINER\n AS $$\nDECLARE\n\nv_function_name text;\nv_new_length int;\nv_parent_schema text;\nv_parent_tablename text;\nv_trig_name text;\nv_trig_sql text;\n\nBEGIN\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)::name\nAND tablename = split_part(p_parent_table, '.', 2)::name;\nv_trig_name := @extschema@.check_name_length(p_object_name := v_parent_tablename, p_suffix := '_part_trig');\n-- Ensure function name matches the naming pattern\nv_function_name := @extschema@.check_name_length(v_parent_tablename, '_part_trig_func', FALSE);\nv_trig_sql := format('CREATE TRIGGER %I BEFORE INSERT ON %I.%I FOR EACH ROW EXECUTE PROCEDURE %I.%I()'\n , v_trig_name\n , v_parent_schema\n , v_parent_tablename\n , v_parent_schema\n , v_function_name);\n\nEXECUTE v_trig_sql;\n\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 1099, "num_statements": 12} {"question": "PostgreSQL regression test 'partition_aggregate': Write the SELECT query (example 102).", "schema": null, "sql": "SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;", "explanation": "Regression test for Partition Aggregate in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 707).", "schema": null, "sql": "CREATE TABLE fk_partitioned_fk_2 (b int, a int,\n\tCONSTRAINT fk_part_con FOREIGN KEY (a, b) REFERENCES fk_notpartitioned_pk ON UPDATE CASCADE ON DELETE CASCADE NOT ENFORCED);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 173, "num_statements": 1} {"question": "pgTAP test for Todotap (assertion 7).", "schema": null, "sql": "SELECT is(\n fail('This is a todo test' ) || '\n'\n || pass('This is a todo test that unexpectedly passes' ),\n 'not ok 6 - This is a todo test # TODO \n# Failed (TODO) test 6: \"This is a todo test\"\nok 7 - This is a todo test that unexpectedly passes # TODO ',\n 'TODO tests should display properly'\n);", "explanation": "SQL assertion from pgTAP test suite for Todotap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 309, "num_statements": 1} {"question": "PostgreSQL regression test 'polymorphism': Write the SELECT query (example 182).", "schema": null, "sql": "select first_el_agg_f8(x::float8) from generate_series(1,10) x;", "explanation": "Regression test for Polymorphism in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select first_el_agg_f8(x::float8) from generate_series(1,10) x) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_visibility' (example 22).", "schema": null, "sql": "select pg_visibility_map_summary('test_index');", "explanation": "Example query from the 'pg_visibility' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_aggregate' (example 40).", "schema": null, "sql": "CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20);", "explanation": "DDL from PostgreSQL core regression test for Partition Aggregate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 221).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (5,2,'34354889.253888047');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'lock' (example 10).", "schema": null, "sql": "CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub;", "explanation": "DDL from PostgreSQL core regression test for Lock.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 70, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 131).", "schema": null, "sql": "SELECT '(-1),(1)'::cube @> '(-1),(1)'::cube AS bool;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 77, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'aggregates' (example 299).", "schema": null, "sql": "create or replace view agg_view1 as\n select aggfns(a,b,c order by b+1)\n from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);", "explanation": "DDL from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (truncate, item 13).", "schema": null, "sql": "CREATE VIEW dependent_view AS SELECT * FROM _timescaledb_internal._hyper_1_5_chunk;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'tidscan': Write the SELECT query (example 33).", "schema": null, "sql": "SELECT * FROM tidscan;", "explanation": "Regression test for Tidscan in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM tidscan) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'strings' (example 335).", "schema": null, "sql": "INSERT INTO toasttest values (repeat('1234567890',300));", "explanation": "DML from PostgreSQL core regression test for Strings.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'publication' (example 655).", "schema": null, "sql": "CREATE TABLE sch1.tbl1_part1 PARTITION OF sch1.tbl1 FOR VALUES FROM (1) to (10);", "explanation": "DDL from PostgreSQL core regression test for Publication.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 214).", "schema": null, "sql": "INSERT INTO caster (text) VALUES ('12'::int2);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 18).", "schema": null, "sql": "SELECT * FROM pg_stat_get_replication_slot('do-not-exist');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'name': Write the SELECT query (example 33).", "schema": null, "sql": "SELECT parse_ident(E'\"c\".X XXXX\\002XXXXXX');", "explanation": "Regression test for Name in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT parse_ident(E'\"c\".X XXXX\\002XXXXXX')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_operator' (example 45).", "schema": null, "sql": "-- Should fail. Procedure should be mandatorily specified\nCREATE OPERATOR #@%# (\n rightarg = int8\n);", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Operator.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 178).", "schema": null, "sql": "select interval 'PT2562047788:00:54.775807';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval 'PT2562047788:00:54.775807') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_role' (example 46).", "schema": null, "sql": "CREATE DATABASE regress_nosuch_db;", "explanation": "DDL from PostgreSQL core regression test for Create Role.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 176).", "schema": null, "sql": "select jsonb '[1,2,3]' @? '$ ? (+@[*] > +2)';", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb '[1,2,3]' @? '$ ? (+@[*] > +2)') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 45, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 183).", "schema": null, "sql": "CREATE FUNCTION isnne(issn, issn13)\n\tRETURNS boolean\n\tAS 'int8ne'\n\tLANGUAGE 'internal'\n\tIMMUTABLE STRICT\n\tPARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 120, "num_statements": 1} {"question": "Show a SQL definition from the zombodb project (issue-864, item 8).", "schema": null, "sql": "CREATE OR REPLACE VIEW cake.cake_summary AS\nSELECT brand.pk_brand_id,\n brand.brand_name,\n (SELECT json_agg(row_to_json(cj.*)) AS json_agg\n FROM (SELECT flavor.pk_flavor,\n flavor.flv_name,\n flavor.flv_color\n FROM cake.flavor\n WHERE (brand.pk_brand_id = flavor.fk_flv_to_bd)) cj)::json AS flavor_data,\n cake.zdb_cake_bd_to_flavor(brand.*) AS zdb\nFROM cake.brand;", "explanation": "SQL definition from the open-source zombodb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 481, "num_statements": 1} {"question": "PostgreSQL regression test 'window': Write the SELECT query (example 69).", "schema": null, "sql": "SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);", "explanation": "Regression test for Window in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),\n\tunique1, four\nFROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 173, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_alter_table_add_foreign_key_without_name, item 10).", "schema": null, "sql": "CREATE TABLE self_referencing_table(id int, ref_id int, PRIMARY KEY (id, ref_id));", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'equivclass' (example 7).", "schema": null, "sql": "create function int8alias2out(int8alias2) returns cstring\n strict immutable language internal as 'int8out';", "explanation": "DDL from PostgreSQL core regression test for Equivclass.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--1.8.0--1.8.1, item 1).", "schema": null, "sql": "/*\n * Apply constraints managed by partman extension\n */\nCREATE OR REPLACE FUNCTION apply_constraints(p_parent_table text, p_child_table text DEFAULT NULL, p_analyze boolean DEFAULT FALSE, p_debug boolean DEFAULT FALSE) RETURNS void\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nv_child_table text;\nv_child_tablename text;\nv_col text;\nv_constraint_cols text[];\nv_constraint_col_type text;\nv_constraint_name text;\nv_datetime_string text;\nv_existing_constraint_name text;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_last_partition text;\nv_last_partition_id int;\nv_last_partition_timestamp timestamp;\nv_constraint_values record;\nv_old_search_path text;\nv_parent_schema text;\nv_parent_tablename text;\nv_part_interval text;\nv_partition_suffix text;\nv_premake int;\nv_sql text;\nv_step_id bigint;\nv_suffix_position int;\nv_type text;\n\nBEGIN\n\nSELECT type\n , part_interval\n , premake\n , datetime_string\n , constraint_cols\n , jobmon\nINTO v_type\n , v_part_interval\n , v_premake\n , v_datetime_string\n , v_constraint_cols\n , v_jobmon\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table;\n\nIF v_constraint_cols IS NULL THEN\n IF p_debug THEN\n RAISE NOTICE 'Given parent table (%) not set up for constraint management (constraint_cols is NULL)', p_parent_table;\n END IF;\n -- Returns silently to allow this function to be simply called by maintenance processes without having to check if config options are set.\n RETURN;\nEND IF;\n\nSELECT show_partitions INTO v_last_partition FROM @extschema@.show_partitions(p_parent_table, 'DESC') LIMIT 1;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE 'SELECT set_config(''search_path'',''@extschema@,'||v_jobmon_schema||''',''false'')';\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_job_id := add_job('PARTMAN CREATE CONSTRAINT: '||p_parent_table);\nEND IF;\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename FROM pg_tables WHERE schemaname ||'.'|| tablename = p_parent_table;\n\n-- If p_child_table is null, figure out the partition that is the one right before the premake value backwards.\nIF p_child_table IS NULL THEN\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Automatically determining most recent child on which to apply constraints');\n END IF;\n\n v_suffix_position := (length(v_last_partition) - position('p_' in reverse(v_last_partition))) + 2;\n\n IF v_type IN ('time-static', 'time-dynamic') THEN\n v_last_partition_timestamp := to_timestamp(substring(v_last_partition from v_suffix_position), v_datetime_string);\n v_partition_suffix := to_char(v_last_partition_timestamp - (v_part_interval::interval * ((v_premake * 2)+1) ), v_datetime_string);\n ELSIF v_type IN ('id-static', 'id-dynamic') THEN\n v_last_partition_id := substring(v_last_partition from v_suffix_position)::int;\n v_partition_suffix := (v_last_partition_id - (v_part_interval::int * ((v_premake * 2)+1) ))::text;\n END IF;\n\n v_child_table := @extschema@.check_name_length(v_parent_tablename, v_parent_schema, v_partition_suffix, TRUE);\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Target child table: '||v_child_table);\n END IF;\nELSE\n v_child_table := p_child_table;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Checking if target child table exists');\nEND IF;\n\nSELECT tablename INTO v_child_tablename FROM pg_catalog.pg_tables WHERE schemaname ||'.'|| tablename = v_child_table;\nIF v_child_tablename IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', 'Target child table ('||v_child_table||') does not exist. Skipping constraint creation.');\n PERFORM close_job(v_job_id);\n EXECUTE 'SELECT set_config(''search_path'','''||v_old_search_path||''',''false'')';\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'Target child table (%) does not exist. Skipping constraint creation.', v_child_table;\n END IF;\n RETURN;\nELSE\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nFOREACH v_col IN ARRAY v_constraint_cols\nLOOP\n SELECT c.conname\n INTO v_existing_constraint_name\n FROM pg_catalog.pg_constraint c\n JOIN pg_catalog.pg_attribute a ON c.conrelid = a.attrelid\n WHERE conrelid = v_child_table::regclass\n AND c.conname LIKE 'partmanconstr_%'\n AND c.contype = 'c'\n AND a.attname = v_col\n AND ARRAY[a.attnum] <@ c.conkey\n AND a.attisdropped = false;\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Applying new constraint on column: '||v_col);\n END IF;\n\n IF v_existing_constraint_name IS NOT NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', 'Partman managed constraint already exists on this table ('||v_child_table||') and column ('||v_col||'). Skipping creation.');\n END IF;\n RAISE WARNING 'Partman managed constraint already exists on this table (%) and column (%). Skipping creation.', v_child_table, v_col ;\n CONTINUE;\n END IF;\n\n -- Ensure column name gets put on end of constraint name to help avoid naming conflicts\n v_constraint_name := @extschema@.check_name_length('partmanconstr_'||v_child_tablename, p_suffix := '_'||v_col);\n\n EXECUTE 'SELECT min('||v_col||')::text AS min, max('||v_col||')::text AS max FROM '||v_child_table INTO v_constraint_values;\n\n IF v_constraint_values IS NOT NULL THEN\n v_sql := concat('ALTER TABLE ', v_child_table, ' ADD CONSTRAINT ', v_constraint_name\n , ' CHECK (', v_col, ' >= ', quote_literal(v_constraint_values.min), ' AND '\n , v_col, ' <= ', quote_literal(v_constraint_values.max), ')' );\n IF p_debug THEN\n RAISE NOTICE 'Constraint creation query: %', v_sql;\n END IF;\n EXECUTE v_sql;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'New constraint created: '||v_sql);\n END IF;\n ELSE\n IF p_debug THEN\n RAISE NOTICE 'Given column (%) contains all NULLs. No constraint created', v_col;\n END IF;\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', 'Given column ('||v_col||') contains all NULLs. No constraint created');\n END IF;\n END IF;\n\nEND LOOP;\n\nIF p_analyze THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Running analyze on partition set: '||p_parent_table);\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'Running analyze on partition set: %', p_parent_table;\n END IF;\n\n EXECUTE 'ANALYZE '||p_parent_table;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE 'SELECT set_config(''search_path'','''||v_old_search_path||''',''false'')';\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_job(''PARTMAN CREATE CONSTRAINT: '||p_parent_table||''')' INTO v_job_id;\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_step('||v_job_id||', ''EXCEPTION before job logging started'')' INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE 'SELECT '||v_jobmon_schema||'.add_step('||v_job_id||', ''EXCEPTION before first step logged'')' INTO v_step_id;\n END IF;\n EXECUTE 'SELECT '||v_jobmon_schema||'.update_step('||v_step_id||', ''CRITICAL'', ''ERROR: '||coalesce(SQLERRM,'unknown')||''')';\n EXECUTE 'SELECT '||v_jobmon_schema||'.fail_job('||v_job_id||')';\n END IF;\n RAISE EXCEPTION '%', SQLERRM;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 8579, "num_statements": 107} {"question": "Write the DML statement from PostgreSQL regression test 'truncate' (example 3).", "schema": null, "sql": "INSERT INTO truncate_a VALUES (2);", "explanation": "DML from PostgreSQL core regression test for Truncate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'window' (example 89).", "schema": null, "sql": "CREATE OR REPLACE TEMP VIEW v_window AS\n\tSELECT i, sum(i) over (order by i groups between 1 preceding and 1 following) as sum_rows FROM generate_series(1, 10) i;", "explanation": "DDL from PostgreSQL core regression test for Window.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 161, "num_statements": 1} {"question": "PostgreSQL regression test 'unicode': Write the SELECT query (example 7).", "schema": null, "sql": "SELECT normalize(U&'\\0061\\0308\\24D1c', NFC) = U&'\\00E4\\24D1c' COLLATE \"C\" AS test_nfc;", "explanation": "Regression test for Unicode in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT normalize(U&'\\0061\\0308\\24D1c', NFC) = U&'\\00E4\\24D1c' COLLATE \"C\" AS test_nfc) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 86, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 14).", "schema": null, "sql": "SELECT count(*) FROM int4tmp WHERE a >= 237::int4;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'equivclass' (example 44).", "schema": null, "sql": "create unique index ec1_expr1 on ec1((ff + 1));", "explanation": "DDL from PostgreSQL core regression test for Equivclass.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'domain' (example 155).", "schema": null, "sql": "create domain dcomptype as comptype check ((value).cf1 > 0);", "explanation": "DDL from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 60, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 147).", "schema": null, "sql": "CREATE TABLE FKTABLE ( ftest1 int, ftest2 int, ftest3 int, ftest4 int, CONSTRAINT constrname3\n\t\t\tFOREIGN KEY(ftest1, ftest2, ftest3) REFERENCES PKTABLE (ptest1, ptest2, ptest3));", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 179, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_schema' (example 15).", "schema": null, "sql": "CREATE SCHEMA regress_schema_1 AUTHORIZATION CURRENT_ROLE\n CREATE VIEW schema_not_existing.view AS SELECT 1;", "explanation": "DDL from PostgreSQL core regression test for Create Schema.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL regression test 'tablespace': Write the SELECT query (example 68).", "schema": null, "sql": "SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c\n where c.reltablespace = t.oid AND c.relname = 'foo_idx';", "explanation": "Regression test for Tablespace in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c\n where c.reltablespace = t.oid AND c.relname = 'foo_idx') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 139, "num_statements": 1} {"question": "PostgreSQL regression test 'temp': Write the SELECT query (example 101).", "schema": null, "sql": "select relname from pg_class where relname ~ '^temp_parted_oncommit_test';", "explanation": "Regression test for Temp in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select relname from pg_class where relname ~ '^temp_parted_oncommit_test') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 74, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (partitioning, item 1).", "schema": null, "sql": "-- Should expect an error when creating a hypertable from a partition\n\\set ON_ERROR_STOP 0\nCREATE TABLE partitioned_ht_create(time timestamptz, temp float, device int) PARTITION BY RANGE (time);", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 194, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 81.", "schema": null, "sql": "REVOKE CREATE ON SCHEMA public FROM PUBLIC;", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dcl_security", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'window' (example 123).", "schema": null, "sql": "-- There is a syntactic ambiguity in the SQL standard. Since\n-- UNBOUNDED is a non-reserved word, it could be the name of a\n-- function parameter and be used as an expression. There is a\n-- grammar hack to resolve such cases as the keyword. The following\n-- tests record this behavior.\n\nCREATE FUNCTION unbounded_syntax_test1a(x int) RETURNS TABLE (a int, b int, c int)\nLANGUAGE SQL\nBEGIN ATOMIC\n SELECT sum(unique1) over (rows between x preceding and x following),\n unique1, four\n FROM tenk1 WHERE unique1 < 10;", "explanation": "PL/pgSQL object from PostgreSQL core test for Window.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 525, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 392).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (9,5,'-24910407.006556420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'vacuum': Write the SELECT query (example 58).", "schema": null, "sql": "SELECT reltuples, relhassubclass\n FROM pg_class WHERE oid = 'past_inh_parent'::regclass;", "explanation": "Regression test for Vacuum in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT reltuples, relhassubclass\n FROM pg_class WHERE oid = 'past_inh_parent'::regclass) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 89, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Populate (example 19).", "schema": null, "sql": "INSERT INTO sequences (sequence, eid, product) VALUES ('ABCDEF', 5, 'env') ;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Populate.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (compat, item 2).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _timescaledb_internal.attach_osm_table_chunk(hypertable regclass, chunk regclass) RETURNS boolean LANGUAGE PLPGSQL AS $$\nBEGIN\n IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN\n RAISE WARNING 'function _timescaledb_internal.attach_osm_table_chunk(regclass,regclass) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';\n END IF;\n RETURN _timescaledb_functions.attach_osm_table_chunk($1,$2);\nEND$$\nSET search_path TO pg_catalog,pg_temp;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 578, "num_statements": 4} {"question": "pgTAP test for Functap (assertion 284).", "schema": null, "sql": "SELECT * FROM check_test(\n is_window( 'someschema', 'huh'::name, 'whatever' ),\n false,\n 'is_window(schema, func, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 155, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Populate (example 13).", "schema": null, "sql": "INSERT INTO entry (accession, txid) VALUES ('A00006', '3') ;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Populate.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 925).", "schema": null, "sql": "select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '\"boolean\"');", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_to_tsvector('english', '{\"a\": \"aaa in bbb\", \"b\": 123, \"c\": 456, \"d\": true, \"f\": false, \"g\": null}'::jsonb, '\"boolean\"')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'constraints': Write the SELECT query (example 47).", "schema": null, "sql": "SELECT * FROM INSERT_TBL;", "explanation": "Regression test for Constraints in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM INSERT_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_prune' (example 260).", "schema": null, "sql": "create table hp1 partition of hp for values with (modulus 4, remainder 1);", "explanation": "DDL from PostgreSQL core regression test for Partition Prune.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 619).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION set_hasnt( TEXT, TEXT, TEXT );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 465).", "schema": null, "sql": "SELECT count(*) FROM testjsonb WHERE j @? '$.public';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM testjsonb WHERE j @? '$.public') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 53, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 55).", "schema": null, "sql": "SELECT time without time zone 'T040506.07';", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT time without time zone 'T040506.07') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 581).", "schema": null, "sql": "$$;\nalter function whoami() owner to regress_fn_owner;\n\ncreate table defer_trig (id integer);\ngrant insert on defer_trig to public;\ncreate constraint trigger whoami after insert on defer_trig\n deferrable initially deferred\n for each row\n execute function whoami();\n\n-- deferred triggers must run as the user that queued the trigger\nbegin;\nset role regress_caller;\ninsert into defer_trig values (1);\nreset role;\nset role regress_fn_owner;\ninsert into defer_trig values (2);\nreset role;\ncommit;\n\n-- security definer functions override the user who queued the trigger\nalter function whoami() security definer;\nbegin;\nset role regress_caller;\ninsert into defer_trig values (3);\nreset role;\ncommit;\nalter function whoami() security invoker;\n\n-- make sure the current user is restored after error\ncreate or replace function whoami() returns trigger language plpgsql\nas $$\nbegin\n raise notice 'I am %', current_user;", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 913, "num_statements": 21} {"question": "Write the DML statement from PostgreSQL regression test 'subselect' (example 144).", "schema": null, "sql": "insert into tb values(4,2);", "explanation": "DML from PostgreSQL core regression test for Subselect.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 27, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'jsonb' (example 431).", "schema": null, "sql": "INSERT INTO jsbpoptest\nSELECT '{\n\t\"jsa\": [1, \"2\", null, 4],\n\t\"rec\": {\"a\": \"abc\", \"c\": \"01.02.2003\", \"x\": 43.2},\n\t\"reca\": [{\"a\": \"abc\", \"b\": 456}, null, {\"c\": \"01.02.2003\", \"x\": 43.2}]\n}'::jsonb\nFROM generate_series(1, 3);", "explanation": "DML from PostgreSQL core regression test for Jsonb.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 221, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 279).", "schema": null, "sql": "INSERT INTO num_exp_div VALUES (6,6,'1.00000000000000000000');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL Queries: show example 30.", "schema": null, "sql": "SELECT a + b AS sum, c FROM table1 ORDER BY sum; SELECT a, max(b) FROM table1 GROUP BY a ORDER BY 1;", "explanation": "Example from PostgreSQL documentation on Queries.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_aggregation", "is_postgresql_specific": false, "sql_length": 100, "num_statements": 2} {"question": "Show a SQL definition from the postgrest project (schema, item 7).", "schema": null, "sql": "CREATE TABLE items AS SELECT x AS id FROM generate_series(1,5) x;", "explanation": "SQL definition from the open-source postgrest PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 727).", "schema": null, "sql": "select * from\n int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,\n lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from\n int8_tbl x left join (select q1,coalesce(q2,0) q2 from int8_tbl) y on x.q2 = y.q1,\n lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 148, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rowsecurity' (example 352).", "schema": null, "sql": "UPDATE t2 t2_1 SET b = t2_2.b FROM t2 t2_2\nWHERE t2_1.a = 3 AND t2_2.a = t2_1.a AND t2_2.b = t2_1.b\nAND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *, t2_1, t2_2;", "explanation": "DML from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 162, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 1006).", "schema": null, "sql": "SELECT count(*)\nFROM pg_foreign_server\nWHERE srvname = 'fetch101'\nAND srvoptions @> array['fetch_size=202'];", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 13).", "schema": null, "sql": "SELECT '1e-300'::cube AS cube;", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 30, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'insert_conflict' (example 130).", "schema": null, "sql": "insert into insertconflictv values (1,'foo')\n on conflict (f1) do update set f2 = excluded.f2;", "explanation": "DML from PostgreSQL core regression test for Insert Conflict.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 11).", "schema": null, "sql": "SELECT count(*) FROM inettmp WHERE a <= '89.225.196.191'::inet;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'foreign_key' (example 1044).", "schema": null, "sql": "CREATE TABLE fk (id int, a int DEFAULT 50) PARTITION BY RANGE (a);", "explanation": "DDL from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 66, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 943).", "schema": null, "sql": "INSERT INTO fk VALUES (1);\t\t-- should fail\nBEGIN;", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 2} {"question": "PostgreSQL regression test 'stats': Write the SELECT query (example 297).", "schema": null, "sql": "SELECT pg_stat_have_stats('database', :dboid, 0);", "explanation": "Regression test for Stats in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT pg_stat_have_stats('database', :dboid, 0)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 166).", "schema": null, "sql": "insert into rtest_t9 values (28, 'Record should go to rtest_t4 and t8');", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 267).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_index ( NAME, NAME, text );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sqljson_queryfuncs' (example 240).", "schema": null, "sql": "CREATE INDEX ON test_jsonb_mutability (JSON_QUERY(js, '$.datetime(\"YY-MM-DD\") ? (@ == $x)' PASSING '2020-07-14'::date AS x));", "explanation": "DDL from PostgreSQL core regression test for Sqljson Queryfuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 125, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 498).", "schema": null, "sql": "$$ language plpgsql;\n\nselect raise_test();\n\ncreate or replace function raise_test() returns void as $$\nbegin\n raise division_by_zero;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 134, "num_statements": 3} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.3.1--2.3.2, item 14).", "schema": null, "sql": "/*\n * Check if parent table is a subpartition of an already existing partition set managed by pg_partman\n * If so, return the limits of what child tables can be created under the given parent table based on its own suffix\n */\nCREATE OR REPLACE FUNCTION check_subpartition_limits(p_parent_table text, p_type text, OUT sub_min text, OUT sub_max text) RETURNS record\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nv_datetime_string text;\nv_id_position int;\nv_parent_schema text;\nv_parent_tablename text;\nv_partition_interval interval;\nv_quarter text;\nv_sub_id_max bigint;\nv_sub_id_min bigint;\nv_sub_timestamp_max timestamp;\nv_sub_timestamp_min timestamp;\nv_time_position int;\nv_top_datetime_string text;\nv_top_interval text;\nv_top_parent text;\nv_top_type text;\nv_year text;\n\nBEGIN\n\nSELECT schemaname, tablename INTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(p_parent_table, '.', 1)\nAND tablename = split_part(p_parent_table, '.', 2);\n\n-- CTE query is done individually for each type (time, id) because it should return NULL if the top parent is not the same type in a subpartition set (id->time or time->id)\n\nIF p_type = 'id' THEN\n\n WITH top_oid AS (\n SELECT i.inhparent AS top_parent_oid\n FROM pg_catalog.pg_class c\n JOIN pg_catalog.pg_inherits i ON c.oid = i.inhrelid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n WHERE n.nspname = v_parent_schema\n AND c.relname = v_parent_tablename\n ) SELECT n.nspname||'.'||c.relname, p.datetime_string, p.partition_interval, p.partition_type\n INTO v_top_parent, v_top_datetime_string, v_top_interval, v_top_type\n FROM pg_catalog.pg_class c\n JOIN top_oid t ON c.oid = t.top_parent_oid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n JOIN @extschema@.part_config p ON p.parent_table = n.nspname||'.'||c.relname\n WHERE c.oid = t.top_parent_oid\n AND p.partition_type = 'id';\n\n IF v_top_parent IS NOT NULL THEN\n SELECT child_start_id::text, child_end_id::text\n INTO sub_min, sub_max\n FROM @extschema@.show_partition_info(p_parent_table, v_top_interval, v_top_parent);\n END IF;\n\nELSIF p_type = 'time' THEN\n\n WITH top_oid AS (\n SELECT i.inhparent AS top_parent_oid\n FROM pg_catalog.pg_class c\n JOIN pg_catalog.pg_inherits i ON c.oid = i.inhrelid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n WHERE n.nspname = v_parent_schema\n AND c.relname = v_parent_tablename\n ) SELECT n.nspname||'.'||c.relname, p.datetime_string, p.partition_interval, p.partition_type\n INTO v_top_parent, v_top_datetime_string, v_top_interval, v_top_type\n FROM pg_catalog.pg_class c\n JOIN top_oid t ON c.oid = t.top_parent_oid\n JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n JOIN @extschema@.part_config p ON p.parent_table = n.nspname||'.'||c.relname\n WHERE c.oid = t.top_parent_oid\n AND p.partition_type = 'time' OR p.partition_type = 'time-custom';\n\n IF v_top_parent IS NOT NULL THEN\n SELECT child_start_time::text, child_end_time::text\n INTO sub_min, sub_max\n FROM @extschema@.show_partition_info(p_parent_table, v_top_interval, v_top_parent);\n END IF;\n\nELSE\n RAISE EXCEPTION 'Invalid type given as parameter to check_subpartition_limits()';\nEND IF;\n\nRETURN;\n\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 3523, "num_statements": 27} {"question": "pgTAP test for Hastap (assertion 121).", "schema": null, "sql": "/****************************************************************************/\n-- Test hasnt_domain().\nSELECT * FROM check_test(\n hasnt_domain( '__foobarbaz__' ),\n true,\n 'hasnt_domain(domain)',\n 'Domain __foobarbaz__ should not exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 258, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 28).", "schema": null, "sql": "CREATE FUNCTION gin_extract_query_oid(oid, internal, int2, internal, internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 145, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.2.0--1.3.0 (assertion 1).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION _runner( text[], text[], text[], text[], text[] )\nRETURNS SETOF TEXT AS $$\nDECLARE\n startup ALIAS FOR $1;\n shutdown ALIAS FOR $2;\n setup ALIAS FOR $3;\n teardown ALIAS FOR $4;\n tests ALIAS FOR $5;\n tap TEXT;\n tfaild INTEGER := 0;\n ffaild INTEGER := 0;\n tnumb INTEGER := 0;\n fnumb INTEGER := 0;\n tok BOOLEAN := TRUE;\nBEGIN\n BEGIN\n -- No plan support.\n PERFORM * FROM no_plan();\n FOR tap IN SELECT * FROM _runem(startup, false) LOOP RETURN NEXT tap; END LOOP;\n EXCEPTION\n -- Catch all exceptions and simply rethrow custom exceptions. This\n -- will roll back everything in the above block.\n WHEN raise_exception THEN RAISE EXCEPTION '%', SQLERRM;\n END;\n\n -- Record how startup tests have failed.\n tfaild := num_failed();\n\n FOR i IN 1..COALESCE(array_upper(tests, 1), 0) LOOP\n\n -- What subtest are we running?\n RETURN NEXT diag_test_name('Subtest: ' || tests[i]);\n\n -- Reset the results.\n tok := TRUE;\n tnumb := COALESCE(_get('curr_test'), 0);\n\n IF tnumb > 0 THEN\n EXECUTE 'ALTER SEQUENCE __tresults___numb_seq RESTART WITH 1';\n PERFORM _set('curr_test', 0);\n PERFORM _set('failed', 0);\n END IF;\n\n DECLARE\n errstate text;\n errmsg text;\n detail text;\n hint text;\n context text;\n schname text;\n tabname text;\n colname text;\n chkname text;\n typname text;\n BEGIN\n BEGIN\n -- Run the setup functions.\n FOR tap IN SELECT * FROM _runem(setup, false) LOOP\n RETURN NEXT regexp_replace(tap, '^', ' ', 'gn');\n END LOOP;\n\n -- Run the actual test function.\n FOR tap IN EXECUTE 'SELECT * FROM ' || tests[i] || '()' LOOP\n RETURN NEXT regexp_replace(tap, '^', ' ', 'gn');\n END LOOP;\n\n -- Run the teardown functions.\n FOR tap IN SELECT * FROM _runem(teardown, false) LOOP\n RETURN NEXT regexp_replace(tap, '^', ' ', 'gn');\n END LOOP;\n\n -- Emit the plan.\n fnumb := COALESCE(_get('curr_test'), 0);\n RETURN NEXT ' 1..' || fnumb;\n\n -- Emit any error messages.\n IF fnumb = 0 THEN\n RETURN NEXT ' # No tests run!';\n tok = false;\n ELSE\n -- Report failures.\n ffaild := num_failed();\n IF ffaild > 0 THEN\n tok := FALSE;\n RETURN NEXT ' ' || diag(\n 'Looks like you failed ' || ffaild || ' test' ||\n CASE ffaild WHEN 1 THEN '' ELSE 's' END\n || ' of ' || fnumb\n );\n END IF;\n END IF;\n\n EXCEPTION WHEN OTHERS THEN\n -- Something went wrong. Record that fact.\n errstate := SQLSTATE;\n errmsg := SQLERRM;\n GET STACKED DIAGNOSTICS\n detail = PG_EXCEPTION_DETAIL,\n hint = PG_EXCEPTION_HINT,\n context = PG_EXCEPTION_CONTEXT,\n schname = SCHEMA_NAME,\n tabname = TABLE_NAME,\n colname = COLUMN_NAME,\n chkname = CONSTRAINT_NAME,\n typname = PG_DATATYPE_NAME;\n END;\n\n -- Always raise an exception to rollback any changes.\n RAISE EXCEPTION '__TAP_ROLLBACK__';\n\n EXCEPTION WHEN raise_exception THEN\n IF errmsg IS NOT NULL THEN\n -- Something went wrong. Emit the error message.\n tok := FALSE;\n RETURN NEXT regexp_replace( diag('Test died: ' || _error_diag(\n errstate, errmsg, detail, hint, context, schname, tabname, colname, chkname, typname\n )), '^', ' ', 'gn');\n errmsg := NULL;\n END IF;\n END;\n\n -- Restore the sequence.\n EXECUTE 'ALTER SEQUENCE __tresults___numb_seq RESTART WITH ' || tnumb + 1;\n PERFORM _set('curr_test', tnumb);\n PERFORM _set('failed', tfaild);\n\n -- Record this test.\n RETURN NEXT ok(tok, tests[i]);\n IF NOT tok THEN tfaild := tfaild + 1; END IF;\n\n END LOOP;\n\n -- Run the shutdown functions.\n FOR tap IN SELECT * FROM _runem(shutdown, false) LOOP RETURN NEXT tap; END LOOP;\n\n -- Finish up.\n FOR tap IN SELECT * FROM _finish( COALESCE(_get('curr_test'), 0), 0, tfaild ) LOOP\n RETURN NEXT tap;\n END LOOP;\n\n -- Clean up and return.\n PERFORM _cleanup();\n RETURN;\nEND;\n$$ LANGUAGE plpgsql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.2.0--1.3.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 4980, "num_statements": 74} {"question": "pgTAP test for Policy (assertion 62).", "schema": null, "sql": "SELECT * FROM check_test(\n policy_cmd_is( 'passwd', 'user_mod', 'all' ),\n false,\n 'policy_cmd_is(table, policy, command) for UPDATE should fail',\n 'Policy user_mod for table passwd should apply to ALL command',\n ' have: UPDATE\n want: ALL'\n);", "explanation": "SQL assertion from pgTAP test suite for Policy.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 270, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 7).", "schema": null, "sql": "SELECT count(*) FROM numerictmp WHERE a > -1890.0;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "CREATE FUNCTION gbt_enum_same(gbtreekey8, gbtreekey8, internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 159).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb '\"[3,4]\"', '$[*]' RETURNING bigint[] EMPTY OBJECT ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb '\"[3,4]\"', '$[*]' RETURNING bigint[] EMPTY OBJECT ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 84, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--2.3.2--2.3.3, item 4).", "schema": null, "sql": "/*\n * Apply constraints managed by partman extension\n */\nCREATE OR REPLACE FUNCTION apply_constraints(p_parent_table text, p_child_table text DEFAULT NULL, p_analyze boolean DEFAULT FALSE, p_job_id bigint DEFAULT NULL, p_debug boolean DEFAULT FALSE) RETURNS void\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_child_exists text;\nv_child_tablename text;\nv_col text;\nv_constraint_cols text[];\nv_constraint_col_type text;\nv_constraint_name text;\nv_constraint_values record;\nv_control text;\nv_datetime_string text;\nv_existing_constraint_name text;\nv_job_id bigint;\nv_jobmon boolean;\nv_jobmon_schema text;\nv_last_partition text;\nv_last_partition_id bigint;\nv_last_partition_timestamp timestamp;\nv_max_id bigint;\nv_max_timestamp timestamp;\nv_old_search_path text;\nv_optimize_constraint int;\nv_parent_schema text;\nv_parent_table text;\nv_parent_tablename text;\nv_partition_interval text;\nv_partition_suffix text;\nv_premake int;\nv_sql text;\nv_step_id bigint;\nv_suffix_position int;\nv_type text;\n\nBEGIN\n\nSELECT parent_table\n , partition_type\n , control\n , premake\n , partition_interval\n , optimize_constraint\n , datetime_string\n , constraint_cols\n , jobmon\nINTO v_parent_table\n , v_type\n , v_control\n , v_premake\n , v_partition_interval\n , v_optimize_constraint\n , v_datetime_string\n , v_constraint_cols\n , v_jobmon\nFROM @extschema@.part_config\nWHERE parent_table = p_parent_table\nAND constraint_cols IS NOT NULL;\n\nIF v_constraint_cols IS NULL THEN\n IF p_debug THEN\n RAISE NOTICE 'Given parent table (%) not set up for constraint management (constraint_cols is NULL)', p_parent_table;\n END IF;\n -- Returns silently to allow this function to be simply called by maintenance processes without having to check if config options are set.\n RETURN;\nEND IF;\n\nSELECT schemaname, tablename\nINTO v_parent_schema, v_parent_tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname = split_part(v_parent_table, '.', 1)::name\nAND tablename = split_part(v_parent_table, '.', 2)::name;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', '@extschema@,'||v_jobmon_schema, 'false');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF p_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN CREATE CONSTRAINT: %s', v_parent_table));\n ELSE\n v_job_id = p_job_id;\n END IF;\nEND IF;\n\n-- If p_child_table is null, figure out the partition that is the one right before the optimize_constraint value backwards.\nIF p_child_table IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Applying additional constraints: Automatically determining most recent child on which to apply constraints');\n END IF;\n\n SELECT partition_tablename INTO v_last_partition FROM @extschema@.show_partitions(v_parent_table, 'DESC') LIMIT 1;\n\n IF v_type IN ('time', 'time-custom') THEN\n SELECT child_start_time INTO v_last_partition_timestamp FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_partition_interval, v_parent_table);\n v_partition_suffix := to_char(v_last_partition_timestamp - (v_partition_interval::interval * (v_optimize_constraint + v_premake + 1) ), v_datetime_string);\n ELSIF v_type = 'id' THEN\n SELECT child_start_id INTO v_last_partition_id FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_partition_interval, v_parent_table);\n v_partition_suffix := (v_last_partition_id - (v_partition_interval::int * (v_optimize_constraint + v_premake + 1) ))::text;\n END IF;\n\n v_child_tablename := @extschema@.check_name_length(v_parent_tablename, v_partition_suffix, TRUE);\n\n IF p_debug THEN\n RAISE NOTICE 'apply_constraint: v_parent_tablename: % , v_partition_suffix: %', v_parent_tablename, v_partition_suffix;\n END IF;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('Target child table: %s.%s', v_parent_schema, v_child_tablename));\n END IF;\nELSE\n v_child_tablename = split_part(p_child_table, '.', 2);\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, 'Applying additional constraints: Checking if target child table exists');\nEND IF;\n\nSELECT tablename FROM pg_catalog.pg_tables INTO v_child_exists WHERE schemaname = v_parent_schema::name AND tablename = v_child_tablename::name;\nIF v_child_exists IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Target child table (%s) does not exist. Skipping constraint creation.', v_child_tablename));\n IF p_job_id IS NULL THEN\n PERFORM close_job(v_job_id);\n END IF;\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'Target child table (%) does not exist. Skipping constraint creation.', v_child_tablename;\n END IF;\n RETURN;\nELSE\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nFOREACH v_col IN ARRAY v_constraint_cols\nLOOP\n SELECT con.conname\n INTO v_existing_constraint_name\n FROM pg_catalog.pg_constraint con\n JOIN pg_class c ON c.oid = con.conrelid\n JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\n JOIN pg_catalog.pg_attribute a ON con.conrelid = a.attrelid\n WHERE c.relname = v_child_tablename\n AND n.nspname = v_parent_schema\n AND con.conname LIKE 'partmanconstr_%'\n AND con.contype = 'c'\n AND a.attname = v_col\n AND ARRAY[a.attnum] OPERATOR(pg_catalog.<@) con.conkey\n AND a.attisdropped = false;\n\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying additional constraints: Applying new constraint on column: %s', v_col));\n END IF;\n\n IF v_existing_constraint_name IS NOT NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Partman managed constraint already exists on this table (%s) and column (%s). Skipping creation.', v_child_tablename, v_col));\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'Partman managed constraint already exists on this table (%) and column (%). Skipping creation.', v_child_tablename, v_col ;\n END IF;\n CONTINUE;\n END IF;\n\n -- Ensure column name gets put on end of constraint name to help avoid naming conflicts\n v_constraint_name := @extschema@.check_name_length('partmanconstr_'||v_child_tablename, p_suffix := '_'||v_col);\n\n EXECUTE format('SELECT min(%I)::text AS min, max(%I)::text AS max FROM %I.%I', v_col, v_col, v_parent_schema, v_child_tablename) INTO v_constraint_values;\n\n IF v_constraint_values IS NOT NULL THEN\n v_sql := format('ALTER TABLE %I.%I ADD CONSTRAINT %I CHECK (%I >= %L AND %I <= %L)'\n , v_parent_schema\n , v_child_tablename\n , v_constraint_name\n , v_col\n , v_constraint_values.min\n , v_col\n , v_constraint_values.max);\n IF p_debug THEN\n RAISE NOTICE 'Constraint creation query: %', v_sql;\n END IF;\n EXECUTE v_sql;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', format('New constraint created: %s', v_sql));\n END IF;\n ELSE\n IF p_debug THEN\n RAISE NOTICE 'Given column (%) contains all NULLs. No constraint created', v_col;\n END IF;\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'NOTICE', format('Given column (%s) contains all NULLs. No constraint created', v_col));\n END IF;\n END IF;\n\nEND LOOP;\n\nIF p_analyze THEN\n IF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying additional constraints: Running analyze on partition set: %s', v_parent_table));\n END IF;\n IF p_debug THEN\n RAISE NOTICE 'Running analyze on partition set: %', v_parent_table;\n END IF;\n\n EXECUTE format('ANALYZE %I.%I', v_parent_schema, v_parent_tablename);\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'Done');\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN CREATE CONSTRAINT: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 10666, "num_statements": 122} {"question": "PostgreSQL regression test 'create_view': Write the SELECT query (example 235).", "schema": null, "sql": "select f1, f4 from tt14v;", "explanation": "Regression test for Create View in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select f1, f4 from tt14v) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.1.0--1.2.0 (assertion 54).", "schema": null, "sql": "DROP FUNCTION _agg ( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.1.0--1.2.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'cluster' (example 13).", "schema": null, "sql": "CREATE TABLE clstr_tst_inh () INHERITS (clstr_tst);", "explanation": "DDL from PostgreSQL core regression test for Cluster.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 51, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 210).", "schema": null, "sql": "SELECT 'ltree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery;", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 83).", "schema": null, "sql": "SELECT xmlserialize(document 'bad' as text);", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlserialize(document 'bad' as text)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "pgTAP test for Hastap (assertion 204).", "schema": null, "sql": "SELECT * FROM check_test(\n has_leftop( '+', 'int8' ),\n true,\n 'has_leftop( name, right )',\n 'Left operator +(NONE,int8) should exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Hastap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 146, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 954).", "schema": null, "sql": "select 'null'::jsonb::bool;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 'null'::jsonb::bool) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 27, "num_statements": 1} {"question": "PostgreSQL Xfunc: show example 1.", "schema": null, "sql": "INSERT INTO mytable VALUES ($1);", "explanation": "Example from PostgreSQL documentation on Xfunc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 30).", "schema": null, "sql": "SELECT * FROM test_int2 WHERE i<=32768::int4 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 209).", "schema": null, "sql": "SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": true, "sql_length": 116, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Trigger (example 4).", "schema": null, "sql": "CREATE TABLE trigger_test_generated (\n i int,\n j int GENERATED ALWAYS AS (i * 2) STORED,\n k int GENERATED ALWAYS AS (i * 3) VIRTUAL\n);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 143, "num_statements": 1} {"question": "PostgreSQL regression test 'xml': Write the SELECT query (example 42).", "schema": null, "sql": "SELECT xmlelement(name foo, xmlattributes('<>&\"''' as funny, xml 'br' as funnier));", "explanation": "Regression test for Xml in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT xmlelement(name foo, xmlattributes('<>&\"''' as funny, xml 'br' as funnier))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 87, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pageinspect' (example 40).", "schema": null, "sql": "insert into test9s values (131584);", "explanation": "Example query from the 'pageinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'foreign_data': Write the SELECT query (example 243).", "schema": null, "sql": "SELECT * FROM information_schema.user_mapping_options ORDER BY lower(authorization_identifier), 2, 3, 4;", "explanation": "Regression test for Foreign Data in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM information_schema.user_mapping_options ORDER BY lower(authorization_identifier), 2, 3, 4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 104, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'lock' (example 58).", "schema": null, "sql": "CREATE OR REPLACE VIEW lock_view2 AS SELECT * from lock_view3;", "explanation": "DDL from PostgreSQL core regression test for Lock.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 23).", "schema": null, "sql": "select '{[a,]}'::textmultirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '{[a,]}'::textmultirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 93).", "schema": null, "sql": "-- Check that all combine functions have signature\n-- combine(transtype, transtype) returns transtype\n\nSELECT a.aggfnoid, p.proname\nFROM pg_aggregate as a, pg_proc as p\nWHERE a.aggcombinefn = p.oid AND\n (p.pronargs != 2 OR\n p.prorettype != p.proargtypes[0] OR\n p.prorettype != p.proargtypes[1] OR\n NOT binary_coercible(a.aggtranstype, p.proargtypes[0]));", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 369, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE TYPE (example 4).", "schema": null, "sql": "CREATE TYPE box; CREATE FUNCTION my_box_in_function(cstring) RETURNS box AS ... ; CREATE FUNCTION my_box_out_function(box) RETURNS cstring AS ... ; CREATE TYPE box ( INTERNALLENGTH = 16, INPUT = my_box_in_function, OUTPUT = my_box_out_function ); CREATE TABLE myboxes ( id integer, description box );", "explanation": "PostgreSQL CREATE TYPE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 300, "num_statements": 5} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 680).", "schema": null, "sql": "CREATE POLICY blog_1 ON blog USING (id % 2 = 0);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'plpgsql': Write the SELECT query (example 312).", "schema": null, "sql": "select 1,2 into x, y;", "explanation": "Regression test for Plpgsql in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 1,2 into x, y) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'sequence' (example 35).", "schema": null, "sql": "INSERT INTO serialTest2 (f1, f3)\n VALUES ('bogus', 32768);", "explanation": "DML from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 39).", "schema": null, "sql": "SELECT citext_larger( 'ab'::citext, 'ac'::citext ) = 'ac' AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 39).", "schema": null, "sql": "select * from pg_input_error_info('[1,2147483647]', 'int4range');", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from pg_input_error_info('[1,2147483647]', 'int4range')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'identity' (example 35).", "schema": null, "sql": "INSERT INTO itest5 OVERRIDING USER VALUE VALUES (DEFAULT, 'ddd'), (-4, 'eee');", "explanation": "DML from PostgreSQL core regression test for Identity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 404).", "schema": null, "sql": "INSERT INTO num_exp_add VALUES (9,8,'-24851923.045047420');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'update' (example 87).", "schema": null, "sql": "INSERT into mintab VALUES (120);", "explanation": "DML from PostgreSQL core regression test for Update.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 32, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 432).", "schema": null, "sql": "-- Now when the last row is missing.\nSELECT * FROM check_test(\n results_ne(\n 'SELECT id, name FROM annames WHERE name <> ''Antonio''',\n 'nenames_ord'\n ),\n true,\n 'results_ne(select, prepared) missing last row',\n '',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 254, "num_statements": 1} {"question": "PL/pgSQL test: Plpgsql Trap (example 35).", "schema": null, "sql": "select trap_foreign_key_2(); -- detects FK violation\ncommit;\t\t\t\t-- still fails\n\ndrop function trap_foreign_key(int);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpgsql Trap.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 3} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 360).", "schema": null, "sql": "SELECT regexp_matches('foobarbequebaz'::citext, '(BAR)(BEQUE)'::citext, ''::citext) = ARRAY[ 'bar', 'beque' ] AS t;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'alter_table' (example 266).", "schema": null, "sql": "insert into alter1.t1(f2) values(12);", "explanation": "DML from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 37, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 681).", "schema": null, "sql": "-- access to call stack from exception\ncreate function inner_func(int)\nreturns int as $$\ndeclare\n _context text;\n sx int := 5;\nbegin\n begin\n perform sx / 0;\n exception\n when division_by_zero then\n get diagnostics _context = pg_context;\n raise notice '***%***', _context;\n end;\n\n -- lets do it again, just for fun..\n get diagnostics _context = pg_context;\n raise notice '***%***', _context;\n raise notice 'lets make sure we didnt break anything';\n return 2 * $1;\nend;\n$$ language plpgsql;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 513, "num_statements": 12} {"question": "Write the DML statement from PostgreSQL regression test 'timestamptz' (example 96).", "schema": null, "sql": "INSERT INTO TIMESTAMPTZ_TBL VALUES ('Feb 16 17:32:01 1697');", "explanation": "DML from PostgreSQL core regression test for Timestamptz.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'compression_lz4' (example 39).", "schema": null, "sql": "INSERT INTO cmpart VALUES (repeat('123456789', 1004));", "explanation": "DML from PostgreSQL core regression test for Compression Lz4.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'cluster' (example 72).", "schema": null, "sql": "INSERT INTO clstr_2 VALUES (1);", "explanation": "DML from PostgreSQL core regression test for Cluster.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "pgTAP test for Pktap (assertion 37).", "schema": null, "sql": "SELECT * FROM check_test(\n col_isnt_pk( 'sometab', 'id' ),\n false,\n 'col_isnt_pk( table, column )',\n 'Column sometab(id) should not be a primary key',\n ' have: {id}\n want: anything else'\n);", "explanation": "SQL assertion from pgTAP test suite for Pktap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 218, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (util_time, item 8).", "schema": null, "sql": "-- Time can be represented in a hypertable as an int* (bigint/integer/smallint) or as a timestamp type (\n-- with or without timezones). In metatables and other internal systems all time values are stored as bigint.\n-- Converting from int* columns to internal representation is a cast to bigint.\n-- Converting from timestamps to internal representation is conversion to epoch (in microseconds).\n\nCREATE OR REPLACE FUNCTION _timescaledb_functions.interval_to_usec(\n chunk_interval INTERVAL\n)\nRETURNS BIGINT LANGUAGE SQL IMMUTABLE PARALLEL SAFE AS\n$BODY$\n SELECT (int_sec * 1000000)::bigint from extract(epoch from chunk_interval) as int_sec;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 648, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 331).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_enum( NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_jsontable': Write the SELECT query (example 58).", "schema": null, "sql": "SELECT * FROM JSON_TABLE(jsonb '\"a\"', '$' COLUMNS (a float4 EXISTS PATH '$.a'));", "explanation": "Regression test for Sqljson Jsontable in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM JSON_TABLE(jsonb '\"a\"', '$' COLUMNS (a float4 EXISTS PATH '$.a'))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": true, "sql_length": 80, "num_statements": 1} {"question": "Show an example of PostgreSQL DROP TSTEMPLATE (example 1).", "schema": null, "sql": "DROP TEXT SEARCH TEMPLATE thesaurus;", "explanation": "PostgreSQL DROP TSTEMPLATE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 397).", "schema": null, "sql": "select count(*) from test_multirange_gist where mr = '{}'::int4multirange;", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select count(*) from test_multirange_gist where mr = '{}'::int4multirange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 74, "num_statements": 1} {"question": "PostgreSQL regression test 'largeobject': Write the SELECT query (example 108).", "schema": null, "sql": "SELECT lo_import(:'filename');", "explanation": "Regression test for Largeobject in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT lo_import(:'filename')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 30, "num_statements": 1} {"question": "pgTAP test for Index (assertion 27).", "schema": null, "sql": "SELECT * FROM check_test(\n has_index( 'public', 'sometab', 'idx_baz'::name ),\n true,\n 'has_index() no cols no desc',\n 'Index idx_baz should exist',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Index.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 169, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonpath': Write the SELECT query (example 163).", "schema": null, "sql": "select '0.0010'::jsonpath;", "explanation": "Regression test for Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '0.0010'::jsonpath) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Show an example of PostgreSQL ALTER ROLE (example 1).", "schema": null, "sql": "ALTER ROLE davide WITH PASSWORD 'hu8jmn3';", "explanation": "PostgreSQL ALTER ROLE command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'hash_part' (example 24).", "schema": null, "sql": "create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);", "explanation": "DDL from PostgreSQL core regression test for Hash Part.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'opr_sanity' (example 4).", "schema": null, "sql": "-- proretset should only be set for normal functions\nSELECT p1.oid, p1.proname\nFROM pg_proc AS p1\nWHERE proretset AND prokind != 'f';", "explanation": "PL/pgSQL object from PostgreSQL core test for Opr Sanity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 133, "num_statements": 1} {"question": "PostgreSQL regression test 'tuplesort': Write the SELECT query (example 12).", "schema": null, "sql": "SELECT noabort_increasing, noabort_decreasing FROM abbrev_abort_uuids ORDER BY noabort_decreasing NULLS FIRST OFFSET 20000 - 4;", "explanation": "Regression test for Tuplesort in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT noabort_increasing, noabort_decreasing FROM abbrev_abort_uuids ORDER BY noabort_decreasing NULLS FIRST OFFSET 20000 - 4) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 127, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'fuzzystrmatch' (example 27).", "schema": null, "sql": "SELECT daitch_mokotoff('Golubitsa');", "explanation": "Example query from the 'fuzzystrmatch' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_jsontable': Write the SELECT query (example 73).", "schema": null, "sql": "SELECT * FROM JSON_TABLE(jsonb '\"world\"', '$' COLUMNS (item text PATH '$' OMIT QUOTES ON SCALAR STRING));", "explanation": "Regression test for Sqljson Jsontable in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM JSON_TABLE(jsonb '\"world\"', '$' COLUMNS (item text PATH '$' OMIT QUOTES ON SCALAR STRING))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 105, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'ltree' (example 157).", "schema": null, "sql": "SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';", "explanation": "Example query from the 'ltree' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'insert_conflict' (example 1).", "schema": null, "sql": "create table insertconflicttest(key int4, fruit text);", "explanation": "DDL from PostgreSQL core regression test for Insert Conflict.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 888).", "schema": null, "sql": "select (-1.0) ^ 1000000000000000;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select (-1.0) ^ 1000000000000000) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 33, "num_statements": 1} {"question": "Show an example of PostgreSQL CREATE TRIGGER (example 5).", "schema": null, "sql": "CREATE TRIGGER view_insert INSTEAD OF INSERT ON my_view FOR EACH ROW EXECUTE FUNCTION view_insert_row();", "explanation": "PostgreSQL CREATE TRIGGER command.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 104, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 48).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('Feb 10 17:32:01 1997');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Throwtap (assertion 35).", "schema": null, "sql": "/****************************************************************************/\n-- Test ASSERTs\nSELECT lives_ok(\n CASE WHEN pg_version_num() < 90500 THEN $exec$\nCREATE FUNCTION check_assert(b boolean) RETURNS void LANGUAGE plpgsql AS $body$\nBEGIN\n RAISE EXCEPTION 'this code should never be called!';", "explanation": "SQL assertion from pgTAP test suite for Throwtap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 305, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_misc' (example 48).", "schema": null, "sql": "INSERT INTO f_star (class, a, f)\n VALUES ('f', 26, '(11111,33333),(22222,44444)'::polygon);", "explanation": "DML from PostgreSQL core regression test for Create Misc.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 93, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'gist' (example 4).", "schema": null, "sql": "create index gist_pointidx3 on gist_point_tbl using gist(p) with (buffering = off);", "explanation": "DDL from PostgreSQL core regression test for Gist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'tstypes': Write the SELECT query (example 37).", "schema": null, "sql": "SELECT 'foo & bar'::tsquery || 'asd & fg';", "explanation": "Regression test for Tstypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'foo & bar'::tsquery || 'asd & fg') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 42, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 136).", "schema": null, "sql": "CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009');", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 596).", "schema": null, "sql": "CREATE POLICY p2 ON x1 FOR INSERT WITH CHECK (a % 2 = 1);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'tuplesort' (example 11).", "schema": null, "sql": "-- plain sort not triggering abbreviated abort\nSELECT noabort_increasing, noabort_decreasing FROM abbrev_abort_uuids ORDER BY noabort_increasing OFFSET 20000 - 4;", "explanation": "PL/pgSQL object from PostgreSQL core test for Tuplesort.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 162, "num_statements": 1} {"question": "PostgreSQL regression test 'float8': Write the SELECT query (example 66).", "schema": null, "sql": "SELECT power(float8 '1', float8 'NaN');", "explanation": "Regression test for Float8 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT power(float8 '1', float8 'NaN')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 629).", "schema": null, "sql": "SELECT round(5e-16383, 16382) = 1e-16382;", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT round(5e-16383, 16382) = 1e-16382) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_logicalinspect' (example 3).", "schema": null, "sql": "SELECT pg_get_logical_snapshot_info('0--40796E18.snap');", "explanation": "Example query from the 'pg_logicalinspect' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Show a SQL definition from the citus project (coordinator_evaluation, item 7).", "schema": null, "sql": "CREATE TABLE coordinator_evaluation_table (key int, value int);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'tid' (example 24).", "schema": null, "sql": "CREATE VIEW tid_view_with_ctid AS SELECT ctid, a FROM tid_tab;", "explanation": "DDL from PostgreSQL core regression test for Tid.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 201).", "schema": null, "sql": "select jsonb_path_query('{\"a\": [1, 2]}', 'lax $.a * 3');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('{\"a\": [1, 2]}', 'lax $.a * 3')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'arrays' (example 291).", "schema": null, "sql": "INSERT INTO arraggtest (f1, f2, f3) VALUES\n('{1,2,3,4}','{{grey,red},{blue,blue}}','{1.6, 0.0}');", "explanation": "DML from PostgreSQL core regression test for Arrays.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 97, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 200).", "schema": null, "sql": "select interval 'PT2562047789';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select interval 'PT2562047789') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 31, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 250).", "schema": null, "sql": "SELECT nummultirange() &> nummultirange(numrange(1,2));", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT nummultirange() &> nummultirange(numrange(1,2))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PostgreSQL regression test 'tsdicts': Write the SELECT query (example 4).", "schema": null, "sql": "SELECT ts_lexize('ispell', 'booking');", "explanation": "Regression test for Tsdicts in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT ts_lexize('ispell', 'booking')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 363).", "schema": null, "sql": "select v, v is null as \"is null\" from string_to_table('1,2,3,4,,6', ',', '') g(v);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select v, v is null as \"is null\" from string_to_table('1,2,3,4,,6', ',', '') g(v)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "PostgreSQL regression test 'aggregates': Write the SELECT query (example 162).", "schema": null, "sql": "select max(unique2) from tenk1 order by 1;", "explanation": "Regression test for Aggregates in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select max(unique2) from tenk1 order by 1) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'transactions' (example 54).", "schema": null, "sql": "CREATE TABLE trans_barbaz (a int);", "explanation": "DDL from PostgreSQL core regression test for Transactions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 34, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 637).", "schema": null, "sql": "select jsonb_path_query('\"12:34\"', '$.datetime(\"HH24:MI\")');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_query('\"12:34\"', '$.datetime(\"HH24:MI\")')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gist' (example 26).", "schema": null, "sql": "SELECT count(*) FROM timestamptztmp WHERE a < '2018-12-18 10:59:54 GMT+2'::timestamptz;", "explanation": "Example query from the 'btree_gist' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 57).", "schema": null, "sql": "insert into rtest_t3 values (3, 33);", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PostgreSQL regression test 'timetz': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT f1 AS \"Seven\" FROM TIMETZ_TBL WHERE f1 > '05:06:07-07';", "explanation": "Regression test for Timetz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT f1 AS \"Seven\" FROM TIMETZ_TBL WHERE f1 > '05:06:07-07') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 9).", "schema": null, "sql": "CREATE FUNCTION uuid_generate_v4()\nRETURNS uuid\nAS 'MODULE_PATHNAME', 'uuid_generate_v4'\nVOLATILE STRICT LANGUAGE C PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 130, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Record (example 27).", "schema": null, "sql": "SELECT * FROM test_table_record_as('obj', null, null, true);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Record.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 60, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 316).", "schema": null, "sql": "SELECT qq, unique1\n FROM\n ( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1\n FULL OUTER JOIN\n ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2\n USING (qq)\n INNER JOIN tenk1 c ON qq = unique2;", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT qq, unique1\n FROM\n ( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1\n FULL OUTER JOIN\n ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2\n USING (qq)\n INNER JOIN tenk1 c ON qq = unique2) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 211, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'hash_index' (example 36).", "schema": null, "sql": "UPDATE hash_i4_heap\n SET seqno = 20000\n WHERE hash_i4_heap.random = 1492795354;", "explanation": "DML from PostgreSQL core regression test for Hash Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'horology': Write the SELECT query (example 210).", "schema": null, "sql": "SELECT '4714-11-24 BC'::date::timestamptz; -- fail\n\nSELECT '4714-11-24 BC'::date < '2020-10-05'::timestamptz as t;", "explanation": "Regression test for Horology in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '4714-11-24 BC'::date::timestamptz; -- fail\n\nSELECT '4714-11-24 BC'::date < '2020-10-05'::timestamptz as t) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 115, "num_statements": 2} {"question": "Write the DML statement from PostgreSQL regression test 'float8' (example 4).", "schema": null, "sql": "INSERT INTO FLOAT8_TBL(f1) VALUES (' -34.84');", "explanation": "DML from PostgreSQL core regression test for Float8.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 82).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"dependency\": 4, \"degree\": 1.000, \"degree\": 1.000}]', 'pg_dependencies');", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,3], \"dependency\": 4, \"degree\": 1.000, \"degree\": 1.000}]', 'pg_dependencies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 132, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 411).", "schema": null, "sql": "INSERT INTO nord (id, name ) VALUES(183, 'Antonio');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 52, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 33).", "schema": null, "sql": "select encode(decrypt(encrypt('Lets try a longer message.', '0123456789', 'aes-cfb'), '0123456789', 'aes-cfb'), 'escape');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 122, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_null_minmax_value_pruning, item 9).", "schema": null, "sql": "SELECT master_create_empty_shard('lineitem') as lineitem_shardid1 \\gset\nSELECT master_create_empty_shard('lineitem') as lineitem_shardid2 \\gset\n\nCREATE TABLE orders (LIKE public.orders);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 186, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'create_procedure' (example 49).", "schema": null, "sql": "DROP PROCEDURE ptest2;", "explanation": "PL/pgSQL object from PostgreSQL core test for Create Procedure.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 22, "num_statements": 1} {"question": "PostgreSQL regression test 'rangetypes': Write the SELECT query (example 24).", "schema": null, "sql": "select '([,z)'::textrange;", "explanation": "Regression test for Rangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '([,z)'::textrange) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 26, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 8).", "schema": null, "sql": "CREATE FUNCTION _int_contains_joinsel(internal, oid, internal, smallint, internal)\nRETURNS float8\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT STABLE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 144, "num_statements": 1} {"question": "pgTAP test for Pgtap--1.0.0--1.1.0 (assertion 11).", "schema": null, "sql": "-- _keys( schema, table, constraint_type )\nCREATE OR REPLACE FUNCTION _keys ( NAME, NAME, CHAR )\nRETURNS SETOF NAME[] AS $$\n SELECT _pg_sv_column_array(x.conrelid,x.conkey) -- name[] doesn't support collation\n FROM pg_catalog.pg_namespace n\n JOIN pg_catalog.pg_class c ON n.oid = c.relnamespace\n JOIN pg_catalog.pg_constraint x ON c.oid = x.conrelid\n WHERE n.nspname = $1\n AND c.relname = $2\n AND x.contype = $3\n ORDER BY 1\n$$ LANGUAGE sql;", "explanation": "SQL assertion from pgTAP test suite for Pgtap--1.0.0--1.1.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 482, "num_statements": 1} {"question": "PostgreSQL regression test 'time': Write the SELECT query (example 19).", "schema": null, "sql": "SELECT '23:59:59.9999999'::time; -- rounds up\nSELECT '23:59:60'::time; -- rounds up\nSELECT '24:00:00'::time; -- allowed\nSELECT '24:00:00.01'::time; -- not allowed\nSELECT '23:59:60.01'::time; -- not allowed\nSELECT '24:01:00'::time; -- not allowed\nSELECT '25:00:00'::time; -- not allowed\n\n-- Test non-error-throwing API\nSELECT pg_input_is_valid('12:00:00', 'time');", "explanation": "Regression test for Time in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT '23:59:59.9999999'::time; -- rounds up\nSELECT '23:59:60'::time; -- rounds up\nSELECT '24:00:00'::time; -- allowed\nSELECT '24:00:00.01'::time; -- not allowed\nSELECT '23:59:60.01'::time; -- not allowed\nSELECT '24:01:00'::time; -- not allowed\nSELECT '25:00:00'::time; -- not allowed\n\n-- Test non-error-throwing API\nSELECT pg_input_is_valid('12:00:00', 'time')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 370, "num_statements": 8} {"question": "Show a query using PostgreSQL contrib extension 'pgstattuple' (example 35).", "schema": null, "sql": "select pgstattuple_approx('test_partitioned');", "explanation": "Example query from the 'pgstattuple' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "PostgreSQL Indices: show example 39.", "schema": null, "sql": "SELECT * FROM test1c WHERE content > constant ;", "explanation": "Example from PostgreSQL documentation on Indices.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 737).", "schema": null, "sql": "-- behave reasonably if someone tries to modify a transition table\nCREATE FUNCTION transition_table_level2_bad_usage_func()\n RETURNS TRIGGER\n LANGUAGE plpgsql\nAS $$\n BEGIN\n INSERT INTO dx VALUES (1000000, 1000000, 'x');\n RETURN NULL;\n END;\n$$;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 253, "num_statements": 4} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 7).", "schema": null, "sql": "--\n-- bt_page_items_bytea()\n--\nCREATE FUNCTION bt_page_items(IN page bytea,\n OUT itemoffset smallint,\n OUT ctid tid,\n OUT itemlen smallint,\n OUT nulls bool,\n OUT vars bool,\n OUT data text)\nRETURNS SETOF record\nAS 'MODULE_PATHNAME', 'bt_page_items_bytea'\nLANGUAGE C STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 304, "num_statements": 1} {"question": "pgTAP test for Policy (assertion 73).", "schema": null, "sql": "SELECT * FROM check_test(\n policy_cmd_is( 'passwd', 'root_all', 'all', 'whatever' ),\n true,\n 'policy_cmd_is(table, policy, command, desc) for ALL',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Policy.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 182, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_join' (example 579).", "schema": null, "sql": "CREATE TABLE beta_neg_p1 PARTITION OF beta_neg FOR VALUES FROM (100) TO (150);", "explanation": "DDL from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 78, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Composite (example 28).", "schema": null, "sql": "SELECT * FROM multiout_record_as('obj', null, null, false);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Composite.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a SQL definition from the timescaledb project (time_bucket, item 4).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.time_bucket(bucket_width INTERVAL, ts UUID) RETURNS TIMESTAMPTZ\n\tAS '@MODULE_PATHNAME@', 'ts_uuid_bucket' LANGUAGE C IMMUTABLE PARALLEL SAFE STRICT;", "explanation": "SQL definition from the open-source timescaledb PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 187, "num_statements": 1} {"question": "Show a SQL definition from the citus project (columnar_data_types, item 4).", "schema": null, "sql": "-- Test array types\nCREATE TABLE test_array_types (int_array int[], bigint_array bigint[],\n\ttext_array text[]) USING columnar;", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 126, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 32).", "schema": null, "sql": "INSERT INTO PKTABLE VALUES (3, 6, 'Test3');", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 8).", "schema": null, "sql": "-- Create the operator class\nCREATE OPERATOR CLASS gist_enum_ops\nDEFAULT FOR TYPE anyenum USING gist\nAS\n\tOPERATOR\t1\t< ,\n\tOPERATOR\t2\t<= ,\n\tOPERATOR\t3\t= ,\n\tOPERATOR\t4\t>= ,\n\tOPERATOR\t5\t> ,\n\tFUNCTION\t1\tgbt_enum_consistent (internal, anyenum, int2, oid, internal),\n\tFUNCTION\t2\tgbt_enum_union (internal, internal),\n\tFUNCTION\t3\tgbt_enum_compress (internal),\n\tFUNCTION\t4\tgbt_decompress (internal),\n\tFUNCTION\t5\tgbt_enum_penalty (internal, internal, internal),\n\tFUNCTION\t6\tgbt_enum_picksplit (internal, internal),\n\tFUNCTION\t7\tgbt_enum_same (gbtreekey8, gbtreekey8, internal),\n\tSTORAGE\t\tgbtreekey8;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 590, "num_statements": 1} {"question": "PostgreSQL regression test 'join': Write the SELECT query (example 517).", "schema": null, "sql": "SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);", "explanation": "Regression test for Join in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 82, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'without_overlaps' (example 14).", "schema": null, "sql": "CREATE TABLE temporal_rng2 (\n CONSTRAINT temporal_rng_pk PRIMARY KEY (id, valid_at WITHOUT OVERLAPS)\n) INHERITS (temporal_rng);", "explanation": "DDL from PostgreSQL core regression test for Without Overlaps.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 128, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'foreign_key' (example 460).", "schema": null, "sql": "update selfref set a = 123 where a = 0;", "explanation": "DML from PostgreSQL core regression test for Foreign Key.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 5).", "schema": null, "sql": "SELECT 'a'::citext = 'A'::text AS f; -- text wins the discussion\nSELECT 'a'::citext = 'b'::citext AS f;", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 2} {"question": "Write the DDL statement from PostgreSQL regression test 'bit' (example 30).", "schema": null, "sql": "CREATE TABLE varbit_table (a BIT VARYING(16), b BIT VARYING(16));", "explanation": "DDL from PostgreSQL core regression test for Bit.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "PostgreSQL regression test 'alter_table': Write the SELECT query (example 648).", "schema": null, "sql": "SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0;", "explanation": "Regression test for Alter Table in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 522).", "schema": null, "sql": "SELECT unistr('wrong: \\U0000db99\\U00000061');", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT unistr('wrong: \\U0000db99\\U00000061')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 45, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 262).", "schema": null, "sql": "insert into rtest_view3 select * from rtest_vview1 where a < 7;", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'drop_if_exists' (example 75).", "schema": null, "sql": "CREATE OPERATOR @#@\n (leftarg = int8, rightarg = int8, procedure = int8xor);", "explanation": "DDL from PostgreSQL core regression test for Drop If Exists.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 83, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_stat_statements' (example 7).", "schema": null, "sql": "SELECT * FROM test_squash WHERE id IN (1, 2, 3);", "explanation": "Example query from the 'pg_stat_statements' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_split': Write the SELECT query (example 206).", "schema": null, "sql": "SELECT * FROM sales_central WHERE sales_state = 'Warsaw';", "explanation": "Regression test for Partition Split in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM sales_central WHERE sales_state = 'Warsaw') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'memoize' (example 43).", "schema": null, "sql": "CREATE TABLE prt (a int) PARTITION BY RANGE(a);", "explanation": "DDL from PostgreSQL core regression test for Memoize.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 47, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 208).", "schema": null, "sql": "INSERT INTO num_exp_mul VALUES (4,6,'250145412892811547138949.592621291590152419206270097656346630226508074074623894951308487425470437268130465956063593951784820669318897182831355375451719125809800516979013437732298382708070979871283132689492336823087794373113039154669229889503700598930220858275174342776478898670277868700384853696009897221747924643343353942154528501454689084608965009561564638167714973711022212547096732831847202912862290958304510651828842182545311077713664465815992616213663619529378061133917572474298028065850515876361609671565914027186063801852554353160801534696062207299890867876199323530337336273950892723090754719547285920090419070001019943385293110663922226230169381423410428577990604776655422105400452217085311617728003688836185608912367677734364834577573255789160419371322775733777518997638403409000055707558465286469808848200141192627396502735');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 875, "num_statements": 1} {"question": "PostgreSQL Plpgsql: show example 23.", "schema": null, "sql": "PERFORM create_mv('cs_session_page_requests_mv', my_query);", "explanation": "Example from PostgreSQL documentation on Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql", "is_postgresql_specific": true, "sql_length": 59, "num_statements": 1} {"question": "PostgreSQL regression test 'create_index': Write the SELECT query (example 347).", "schema": null, "sql": "SELECT unique1, unique2 FROM onek_with_null\n ORDER BY unique2 DESC LIMIT 5;", "explanation": "Regression test for Create Index in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT unique1, unique2 FROM onek_with_null\n ORDER BY unique2 DESC LIMIT 5) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 76, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'sequence' (example 13).", "schema": null, "sql": "CREATE SEQUENCE sequence_test9 AS integer INCREMENT BY -1;", "explanation": "DDL from PostgreSQL core regression test for Sequence.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 53).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION fail ( text );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 430).", "schema": null, "sql": "select pg_typeof(array['11 22 33'::oidvector]);", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select pg_typeof(array['11 22 33'::oidvector])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 47, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'vacuum' (example 129).", "schema": null, "sql": "INSERT INTO vac_truncate_test VALUES (1, NULL), (NULL, NULL);", "explanation": "DML from PostgreSQL core regression test for Vacuum.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 61, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rowsecurity' (example 1068).", "schema": null, "sql": "CREATE POLICY p3 ON rls_part USING (a < 0);", "explanation": "DDL from PostgreSQL core regression test for Rowsecurity.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "PostgreSQL Ddl: show example 53.", "schema": null, "sql": "ALTER TABLE products ALTER COLUMN product_no DROP NOT NULL;", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_alter", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 278).", "schema": null, "sql": "INSERT INTO caster (text) VALUES ('((0,0),2)'::circle);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'transactions' (example 127).", "schema": null, "sql": "INSERT INTO savepoints VALUES (13);", "explanation": "DML from PostgreSQL core regression test for Transactions.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson': Write the SELECT query (example 45).", "schema": null, "sql": "SELECT JSON_SERIALIZE('{ \"a\" : 1 } ' RETURNING varchar);", "explanation": "Regression test for Sqljson in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_SERIALIZE('{ \"a\" : 1 } ' RETURNING varchar)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 56, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric_big' (example 95).", "schema": null, "sql": "INSERT INTO num_exp_sub VALUES (1,8,'-8496900980.24523699375539429928140707116805167695126380524350074691312247557192264420150419818976723729812860582476663647913254442686555191453722107164485675679551050629376558940966195135841284978096687306110481009743118940565957556492470398904849289222365256698601073536111216152709126800604695001949246634784573028721762079936564434050796321975774729383704426321489070979168993853338252728216162346796960170352897972568238870481118474064783391570102958474141459619245240874849766946530000977144965');", "explanation": "DML from PostgreSQL core regression test for Numeric Big.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 540, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'stats' (example 104).", "schema": null, "sql": "-- check that function dropped in a subtransaction leaves no stats behind\nBEGIN;", "explanation": "PL/pgSQL object from PostgreSQL core test for Stats.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_function", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "PostgreSQL regression test 'pg_dependencies': Write the SELECT query (example 88).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,2], \"dependency\" : 4, \"degree\": 0.500}]', 'pg_dependencies');", "explanation": "Regression test for Pg Dependencies in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('[{\"attributes\" : [2,2], \"dependency\" : 4, \"degree\": 0.500}]', 'pg_dependencies')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 116, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'postgres_fdw' (example 242).", "schema": null, "sql": "SELECT ft5, ft5.c1, ft5.c2, ft5.c3, ft4.c1, ft4.c2 FROM ft5 left join ft4 on ft5.c1 = ft4.c1 WHERE ft4.c1 BETWEEN 10 and 30 ORDER BY ft5.c1, ft4.c1;", "explanation": "Example query from the 'postgres_fdw' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'spgist' (example 7).", "schema": null, "sql": "insert into spgist_point_tbl (id, p)\nselect g+100000, point(g*10+1, g*10+1) from generate_series(1, 10000) g;", "explanation": "DML from PostgreSQL core regression test for Spgist.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 739).", "schema": null, "sql": "select '\"a\"'::jsonb #- '{a}'; -- error\nselect '{}'::jsonb #- '{a}';", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '\"a\"'::jsonb #- '{a}'; -- error\nselect '{}'::jsonb #- '{a}') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 67, "num_statements": 2} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 663).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION roles_are( NAME[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "PL/pgSQL test: Pltcl Queries (example 79).", "schema": null, "sql": "}\nspi_exec {select replaceme('foe') as inner}\nreturn \"fee $1 $inner\"\n$p$ language pltcl;", "explanation": "PL/pgSQL example from PostgreSQL source test for Pltcl Queries.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 88, "num_statements": 1} {"question": "PostgreSQL regression test 'rowsecurity': Write the SELECT query (example 246).", "schema": null, "sql": "SELECT * FROM part_document_satire ORDER by did;", "explanation": "Regression test for Rowsecurity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM part_document_satire ORDER by did) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 48, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 37).", "schema": null, "sql": "CREATE FUNCTION seg_upper(seg)\nRETURNS float4\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 109, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'domain' (example 53).", "schema": null, "sql": "INSERT INTO domarrtest values ('{{2,2},{2,2}}', '{{\"a\",\"b\"}}');", "explanation": "DML from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'regproc': Write the SELECT query (example 20).", "schema": null, "sql": "SELECT regclass('pg_catalog.pg_class');", "explanation": "Regression test for Regproc in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT regclass('pg_catalog.pg_class')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 39, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'insert' (example 121).", "schema": null, "sql": "create table part_xx_yy partition of list_parted for values in ('xx', 'yy') partition by list (a);", "explanation": "DDL from PostgreSQL core regression test for Insert.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 98, "num_statements": 1} {"question": "PostgreSQL regression test 'create_index_spgist': Write the SELECT query (example 22).", "schema": null, "sql": "SELECT count(*) FROM quad_point_tbl WHERE p <<| '(5000, 4000)';", "explanation": "Regression test for Create Index Spgist in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT count(*) FROM quad_point_tbl WHERE p <<| '(5000, 4000)') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 63, "num_statements": 1} {"question": "PostgreSQL regression test 'multirangetypes': Write the SELECT query (example 115).", "schema": null, "sql": "SELECT * FROM nummultirange_test WHERE multirange_contains_range(nmr, numrange(4.0, 4.2));", "explanation": "Regression test for Multirangetypes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM nummultirange_test WHERE multirange_contains_range(nmr, numrange(4.0, 4.2))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "PostgreSQL regression test 'partition_merge': Write the SELECT query (example 227).", "schema": null, "sql": "SELECT tablename, indexname, tablespace FROM pg_indexes\n WHERE tablename IN ('t', 'tp_0_2') AND schemaname = 'partitions_merge_schema'\n ORDER BY tablename COLLATE \"C\", indexname COLLATE \"C\", tablespace COLLATE \"C\";", "explanation": "Regression test for Partition Merge in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT tablename, indexname, tablespace FROM pg_indexes\n WHERE tablename IN ('t', 'tp_0_2') AND schemaname = 'partitions_merge_schema'\n ORDER BY tablename COLLATE \"C\", indexname COLLATE \"C\", tablespace COLLATE \"C\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 216, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Types (example 22).", "schema": null, "sql": "SELECT * FROM test_type_conversion_int4(null);", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Types.", "validation_query": null, "source": "plpgsql_source", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'aggregates' (example 175).", "schema": null, "sql": "create table minmaxtest2() inherits (minmaxtest);", "explanation": "DDL from PostgreSQL core regression test for Aggregates.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 49, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 18).", "schema": null, "sql": "CREATE TYPE gbtreekey_var (\n\tINTERNALLENGTH = VARIABLE,\n\tINPUT = gbtreekey_var_in,\n\tOUTPUT = gbtreekey_var_out,\n\tSTORAGE = EXTENDED\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 135, "num_statements": 1} {"question": "PostgreSQL regression test 'expressions': Write the SELECT query (example 69).", "schema": null, "sql": "select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null);", "explanation": "Regression test for Expressions in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint, null)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 90, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'collate' (example 101).", "schema": null, "sql": "CREATE TABLE collate_test23 (f1 text collate mycoll2);", "explanation": "DDL from PostgreSQL core regression test for Collate.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 54, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'lock' (example 8).", "schema": null, "sql": "CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1;", "explanation": "DDL from PostgreSQL core regression test for Lock.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_view", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "Show a SQL definition from the citus project (citus_local_table_triggers, item 37).", "schema": null, "sql": "CREATE TABLE local_table(value int);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'numeric' (example 454).", "schema": null, "sql": "INSERT INTO num_exp_power_10_ln VALUES (4,'7405685069594999.07733999469386277636');", "explanation": "DML from PostgreSQL core regression test for Numeric.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 83, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb_jsonpath': Write the SELECT query (example 225).", "schema": null, "sql": "select jsonb_path_match('[[1, true], [2, false]]', 'strict $[*] ? (@[0] < $x) [1]', '{\"x\": 2}');", "explanation": "Regression test for Jsonb Jsonpath in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select jsonb_path_match('[[1, true], [2, false]]', 'strict $[*] ? (@[0] < $x) [1]', '{\"x\": 2}')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 96, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 951).", "schema": null, "sql": "select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select ts_headline('{}'::jsonb, tsquery('aaa & bbb'))) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 1} {"question": "Show a SQL definition from the citus project (multi_partitioning, item 33).", "schema": null, "sql": "-- 3-) Attaching non distributed table to a distributed table\nCREATE TABLE partitioning_test_2012(id int, time date);", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 117, "num_statements": 1} {"question": "pgTAP test for Coltap (assertion 24).", "schema": null, "sql": "SELECT * FROM check_test(\n col_type_is( 'public', 'sometab', 'ctstz', 'pg_catalog'::name, 'timestamptz' ),\n true,\n 'col_type_is( sch, tab, col, sch, type, desc )',\n 'Column public.sometab.ctstz should be type pg_catalog.timestamptz',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Coltap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 255, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'constraints' (example 329).", "schema": null, "sql": "CREATE TABLE ATACC1 (a int, NOT NULL a NO INHERIT) PARTITION BY LIST (a);", "explanation": "DDL from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 73, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'copy2' (example 193).", "schema": null, "sql": "CREATE ROLE regress_rls_copy_user_colperms;", "explanation": "DDL from PostgreSQL core regression test for Copy2.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 43, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 300).", "schema": null, "sql": "SELECT JSON_VALUE(jsonb '1234', '$' RETURNING queryfuncs_d_varbit3 DEFAULT '111111' ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_VALUE(jsonb '1234', '$' RETURNING queryfuncs_d_varbit3 DEFAULT '111111' ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 95, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 21).", "schema": null, "sql": "SELECT * FROM test_int2 WHERE i>1::int8 ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'test_decoding' (example 60).", "schema": null, "sql": "INSERT INTO origin_tbl(data) VALUES ('no_lsn, commit');", "explanation": "Example query from the 'test_decoding' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'partition_join' (example 433).", "schema": null, "sql": "INSERT INTO plt2_adv SELECT i, i, to_char(i % 10, 'FM0000') FROM generate_series(1, 299) i WHERE i % 10 IN (7, 9);", "explanation": "DML from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'amcheck' (example 11).", "schema": null, "sql": "INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;", "explanation": "Example query from the 'amcheck' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 77, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 15).", "schema": null, "sql": "-- gist key\nCREATE FUNCTION gtrgm_in(cstring)\nRETURNS gtrgm\nAS 'MODULE_PATHNAME'\nLANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "PostgreSQL regression test 'date': Write the SELECT query (example 163).", "schema": null, "sql": "SELECT * FROM pg_input_error_info('6874898-01-01', 'date');", "explanation": "Regression test for Date in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM pg_input_error_info('6874898-01-01', 'date')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'create_index' (example 202).", "schema": null, "sql": "INSERT INTO func_index_heap VALUES('AB','CDEFG');", "explanation": "DML from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 49, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'psql' (example 233).", "schema": null, "sql": "$$ LANGUAGE plpgsql;\n\n\\set ON_ERROR_ROLLBACK on\n\\echo '# ON_ERROR_ROLLBACK:' :ON_ERROR_ROLLBACK\n\\echo '# AUTOCOMMIT:' :AUTOCOMMIT\n\nBEGIN;\nCREATE TABLE bla(s NO_SUCH_TYPE); -- fails\nCREATE TABLE bla(s TEXT); -- succeeds\nSELECT psql_error('oops!'); -- fails\nINSERT INTO bla VALUES ('Calvin'), ('Hobbes');\nCOMMIT;\n\nSELECT * FROM bla ORDER BY 1;\n\nBEGIN;\nINSERT INTO bla VALUES ('Susie'); -- succeeds\n-- now with combined queries\nINSERT INTO bla VALUES ('Rosalyn') \\; -- will rollback\nSELECT 'before error' AS show \\; -- will show nevertheless!\n SELECT psql_error('boum!') \\; -- failure\n SELECT 'after error' AS noshow; -- hidden by preceding error\nINSERT INTO bla(s) VALUES ('Moe') \\; -- will rollback\n SELECT psql_error('bam!');\nINSERT INTO bla VALUES ('Miss Wormwood'); -- succeeds\nCOMMIT;\nSELECT * FROM bla ORDER BY 1;\n\n-- some with autocommit off\n\\set AUTOCOMMIT off\n\\echo '# AUTOCOMMIT:' :AUTOCOMMIT\n\n-- implicit BEGIN\nINSERT INTO bla VALUES ('Dad'); -- succeeds\nSELECT psql_error('bad!'); -- implicit partial rollback\n\nINSERT INTO bla VALUES ('Mum') \\; -- will rollback\nSELECT COUNT(*) AS \"#mum\"\nFROM bla WHERE s = 'Mum' \\; -- but be counted here\nSELECT psql_error('bad!'); -- implicit partial rollback\nCOMMIT;\n\nSELECT COUNT(*) AS \"#mum\"\nFROM bla WHERE s = 'Mum' \\; -- no mum here\nSELECT * FROM bla ORDER BY 1;\nCOMMIT;\n\n-- reset all\n\\set AUTOCOMMIT on\n\\set ON_ERROR_ROLLBACK off\n\\echo '# final ON_ERROR_ROLLBACK:' :ON_ERROR_ROLLBACK\nDROP TABLE bla;\nDROP FUNCTION psql_error;\n\n-- check describing invalid multipart names\n\\dA regression.heap\n\\dA nonesuch.heap\n\\dt host.regression.pg_catalog.pg_class\n\\dt |.pg_catalog.pg_class\n\\dt nonesuch.pg_catalog.pg_class\n\\da host.regression.pg_catalog.sum\n\\da +.pg_catalog.sum\n\\da nonesuch.pg_catalog.sum\n\\dAc nonesuch.brin\n\\dAc regression.brin\n\\dAf nonesuch.brin\n\\dAf regression.brin\n\\dAo nonesuch.brin\n\\dAo regression.brin\n\\dAp nonesuch.brin\n\\dAp regression.brin\n\\db nonesuch.pg_default\n\\db regression.pg_default\n\\dc host.regression.public.conversion\n\\dc (.public.conversion\n\\dc nonesuch.public.conversion\n\\dC host.regression.pg_catalog.int8\n\\dC ).pg_catalog.int8\n\\dC nonesuch.pg_catalog.int8\n\\dd host.regression.pg_catalog.pg_class\n\\dd [.pg_catalog.pg_class\n\\dd nonesuch.pg_catalog.pg_class\n\\dD host.regression.public.gtestdomain1\n\\dD ].public.gtestdomain1\n\\dD nonesuch.public.gtestdomain1\n\\ddp host.regression.pg_catalog.pg_class\n\\ddp {.pg_catalog.pg_class\n\\ddp nonesuch.pg_catalog.pg_class\n\\dE host.regression.public.ft\n\\dE }.public.ft\n\\dE nonesuch.public.ft\n\\di host.regression.public.tenk1_hundred\n\\di ..public.tenk1_hundred\n\\di nonesuch.public.tenk1_hundred\n\\dm host.regression.public.mvtest_bb\n\\dm ^.public.mvtest_bb\n\\dm nonesuch.public.mvtest_bb\n\\ds host.regression.public.check_seq\n\\ds regression|mydb.public.check_seq\n\\ds nonesuch.public.check_seq\n\\dt host.regression.public.b_star\n\\dt regres+ion.public.b_star\n\\dt nonesuch.public.b_star\n\\dv host.regression.public.shoe\n\\dv regress(ion).public.shoe\n\\dv nonesuch.public.shoe\n\\des nonesuch.server\n\\des regression.server\n\\des nonesuch.server\n\\des regression.server\n\\des nonesuch.username\n\\des regression.username\n\\dew nonesuch.fdw\n\\dew regression.fdw\n\\df host.regression.public.namelen\n\\df regres[qrstuv]ion.public.namelen\n\\df nonesuch.public.namelen\n\\dF host.regression.pg_catalog.arabic\n\\dF regres{1,2}ion.pg_catalog.arabic\n\\dF nonesuch.pg_catalog.arabic\n\\dFd host.regression.pg_catalog.arabic_stem\n\\dFd regres?ion.pg_catalog.arabic_stem\n\\dFd nonesuch.pg_catalog.arabic_stem\n\\dFp host.regression.pg_catalog.default\n\\dFp ^regression.pg_catalog.default\n\\dFp nonesuch.pg_catalog.default\n\\dFt host.regression.pg_catalog.ispell\n\\dFt regression$.pg_catalog.ispell\n\\dFt nonesuch.pg_catalog.ispell\n\\dg nonesuch.pg_database_owner\n\\dg regression.pg_database_owner\n\\dL host.regression.plpgsql\n\\dL *.plpgsql\n\\dL nonesuch.plpgsql\n\\dn host.regression.public\n\\dn \"\"\"\".public\n\\dn nonesuch.public\n\\do host.regression.public.!=-\n\\do \"regression|mydb\".public.!=-\n\\do nonesuch.public.!=-\n\\dO host.regression.pg_catalog.POSIX\n\\dO .pg_catalog.POSIX\n\\dO nonesuch.pg_catalog.POSIX\n\\dp host.regression.public.a_star\n\\dp \"regres+ion\".public.a_star\n\\dp nonesuch.public.a_star\n\\dP host.regression.public.mlparted\n\\dP \"regres(sion)\".public.mlparted\n\\dP nonesuch.public.mlparted\n\\drds nonesuch.lc_messages\n\\drds regression.lc_messages\n\\dRp public.mypub\n\\dRp regression.mypub\n\\dRs public.mysub\n\\dRs regression.mysub\n\\dT host.regression.public.widget\n\\dT \"regression{1,2}\".public.widget\n\\dT nonesuch.public.widget\n\\dx regression.plpgsql\n\\dx nonesuch.plpgsql\n\\dX host.regression.public.func_deps_stat\n\\dX \"^regression$\".public.func_deps_stat\n\\dX nonesuch.public.func_deps_stat\n\\dy regression.myevt\n\\dy nonesuch.myevt\n\n-- check that dots within quoted name segments are not counted\n\\dA \"no.such.access.method\"\n\\dt \"no.such.table.relation\"\n\\da \"no.such.aggregate.function\"\n\\dAc \"no.such.operator.class\"\n\\dAf \"no.such.operator.family\"\n\\dAo \"no.such.operator.of.operator.family\"\n\\dAp \"no.such.operator.support.function.of.operator.family\"\n\\db \"no.such.tablespace\"\n\\dc \"no.such.conversion\"\n\\dC \"no.such.cast\"\n\\dd \"no.such.object.description\"\n\\dD \"no.such.domain\"\n\\ddp \"no.such.default.access.privilege\"\n\\di \"no.such.index.relation\"\n\\dm \"no.such.materialized.view\"\n\\ds \"no.such.relation\"\n\\dt \"no.such.relation\"\n\\dv \"no.such.relation\"\n\\des \"no.such.foreign.server\"\n\\dew \"no.such.foreign.data.wrapper\"\n\\df \"no.such.function\"\n\\dF \"no.such.text.search.configuration\"\n\\dFd \"no.such.text.search.dictionary\"\n\\dFp \"no.such.text.search.parser\"\n\\dFt \"no.such.text.search.template\"\n\\dg \"no.such.role\"\n\\dL \"no.such.language\"\n\\dn \"no.such.schema\"\n\\do \"no.such.operator\"\n\\dO \"no.such.collation\"\n\\dp \"no.such.access.privilege\"\n\\dP \"no.such.partitioned.relation\"\n\\drds \"no.such.setting\"\n\\dRp \"no.such.publication\"\n\\dRs \"no.such.subscription\"\n\\dT \"no.such.data.type\"\n\\dx \"no.such.installed.extension\"\n\\dX \"no.such.extended.statistics\"\n\\dy \"no.such.event.trigger\"\n\n-- again, but with dotted schema qualifications.\n\\dA \"no.such.schema\".\"no.such.access.method\"\n\\dt \"no.such.schema\".\"no.such.table.relation\"\n\\da \"no.such.schema\".\"no.such.aggregate.function\"\n\\dAc \"no.such.schema\".\"no.such.operator.class\"\n\\dAf \"no.such.schema\".\"no.such.operator.family\"\n\\dAo \"no.such.schema\".\"no.such.operator.of.operator.family\"\n\\dAp \"no.such.schema\".\"no.such.operator.support.function.of.operator.family\"\n\\db \"no.such.schema\".\"no.such.tablespace\"\n\\dc \"no.such.schema\".\"no.such.conversion\"\n\\dC \"no.such.schema\".\"no.such.cast\"\n\\dd \"no.such.schema\".\"no.such.object.description\"\n\\dD \"no.such.schema\".\"no.such.domain\"\n\\ddp \"no.such.schema\".\"no.such.default.access.privilege\"\n\\di \"no.such.schema\".\"no.such.index.relation\"\n\\dm \"no.such.schema\".\"no.such.materialized.view\"\n\\ds \"no.such.schema\".\"no.such.relation\"\n\\dt \"no.such.schema\".\"no.such.relation\"\n\\dv \"no.such.schema\".\"no.such.relation\"\n\\des \"no.such.schema\".\"no.such.foreign.server\"\n\\dew \"no.such.schema\".\"no.such.foreign.data.wrapper\"\n\\df \"no.such.schema\".\"no.such.function\"\n\\dF \"no.such.schema\".\"no.such.text.search.configuration\"\n\\dFd \"no.such.schema\".\"no.such.text.search.dictionary\"\n\\dFp \"no.such.schema\".\"no.such.text.search.parser\"\n\\dFt \"no.such.schema\".\"no.such.text.search.template\"\n\\dg \"no.such.schema\".\"no.such.role\"\n\\dL \"no.such.schema\".\"no.such.language\"\n\\do \"no.such.schema\".\"no.such.operator\"\n\\dO \"no.such.schema\".\"no.such.collation\"\n\\dp \"no.such.schema\".\"no.such.access.privilege\"\n\\dP \"no.such.schema\".\"no.such.partitioned.relation\"\n\\drds \"no.such.schema\".\"no.such.setting\"\n\\dRp \"no.such.schema\".\"no.such.publication\"\n\\dRs \"no.such.schema\".\"no.such.subscription\"\n\\dT \"no.such.schema\".\"no.such.data.type\"\n\\dx \"no.such.schema\".\"no.such.installed.extension\"\n\\dX \"no.such.schema\".\"no.such.extended.statistics\"\n\\dy \"no.such.schema\".\"no.such.event.trigger\"\n\n-- again, but with current database and dotted schema qualifications.\n\\dt regression.\"no.such.schema\".\"no.such.table.relation\"\n\\da regression.\"no.such.schema\".\"no.such.aggregate.function\"\n\\dc regression.\"no.such.schema\".\"no.such.conversion\"\n\\dC regression.\"no.such.schema\".\"no.such.cast\"\n\\dd regression.\"no.such.schema\".\"no.such.object.description\"\n\\dD regression.\"no.such.schema\".\"no.such.domain\"\n\\di regression.\"no.such.schema\".\"no.such.index.relation\"\n\\dm regression.\"no.such.schema\".\"no.such.materialized.view\"\n\\ds regression.\"no.such.schema\".\"no.such.relation\"\n\\dt regression.\"no.such.schema\".\"no.such.relation\"\n\\dv regression.\"no.such.schema\".\"no.such.relation\"\n\\df regression.\"no.such.schema\".\"no.such.function\"\n\\dF regression.\"no.such.schema\".\"no.such.text.search.configuration\"\n\\dFd regression.\"no.such.schema\".\"no.such.text.search.dictionary\"\n\\dFp regression.\"no.such.schema\".\"no.such.text.search.parser\"\n\\dFt regression.\"no.such.schema\".\"no.such.text.search.template\"\n\\do regression.\"no.such.schema\".\"no.such.operator\"\n\\dO regression.\"no.such.schema\".\"no.such.collation\"\n\\dp regression.\"no.such.schema\".\"no.such.access.privilege\"\n\\dP regression.\"no.such.schema\".\"no.such.partitioned.relation\"\n\\dT regression.\"no.such.schema\".\"no.such.data.type\"\n\\dX regression.\"no.such.schema\".\"no.such.extended.statistics\"\n\n-- again, but with dotted database and dotted schema qualifications.\n\\dt \"no.such.database\".\"no.such.schema\".\"no.such.table.relation\"\n\\da \"no.such.database\".\"no.such.schema\".\"no.such.aggregate.function\"\n\\dc \"no.such.database\".\"no.such.schema\".\"no.such.conversion\"\n\\dC \"no.such.database\".\"no.such.schema\".\"no.such.cast\"\n\\dd \"no.such.database\".\"no.such.schema\".\"no.such.object.description\"\n\\dD \"no.such.database\".\"no.such.schema\".\"no.such.domain\"\n\\ddp \"no.such.database\".\"no.such.schema\".\"no.such.default.access.privilege\"\n\\di \"no.such.database\".\"no.such.schema\".\"no.such.index.relation\"\n\\dm \"no.such.database\".\"no.such.schema\".\"no.such.materialized.view\"\n\\ds \"no.such.database\".\"no.such.schema\".\"no.such.relation\"\n\\dt \"no.such.database\".\"no.such.schema\".\"no.such.relation\"\n\\dv \"no.such.database\".\"no.such.schema\".\"no.such.relation\"\n\\df \"no.such.database\".\"no.such.schema\".\"no.such.function\"\n\\dF \"no.such.database\".\"no.such.schema\".\"no.such.text.search.configuration\"\n\\dFd \"no.such.database\".\"no.such.schema\".\"no.such.text.search.dictionary\"\n\\dFp \"no.such.database\".\"no.such.schema\".\"no.such.text.search.parser\"\n\\dFt \"no.such.database\".\"no.such.schema\".\"no.such.text.search.template\"\n\\do \"no.such.database\".\"no.such.schema\".\"no.such.operator\"\n\\dO \"no.such.database\".\"no.such.schema\".\"no.such.collation\"\n\\dp \"no.such.database\".\"no.such.schema\".\"no.such.access.privilege\"\n\\dP \"no.such.database\".\"no.such.schema\".\"no.such.partitioned.relation\"\n\\dT \"no.such.database\".\"no.such.schema\".\"no.such.data.type\"\n\\dX \"no.such.database\".\"no.such.schema\".\"no.such.extended.statistics\"\n\n-- check \\drg and \\du\nCREATE ROLE regress_du_role0;\nCREATE ROLE regress_du_role1;\nCREATE ROLE regress_du_role2;\nCREATE ROLE regress_du_admin;\n\nGRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE;\nGRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE;\nGRANT regress_du_role2 TO regress_du_admin WITH ADMIN TRUE;\n\nGRANT regress_du_role0 TO regress_du_role1 WITH ADMIN TRUE, INHERIT TRUE, SET TRUE GRANTED BY regress_du_admin;\nGRANT regress_du_role0 TO regress_du_role2 WITH ADMIN TRUE, INHERIT FALSE, SET FALSE GRANTED BY regress_du_admin;\nGRANT regress_du_role1 TO regress_du_role2 WITH ADMIN TRUE , INHERIT FALSE, SET TRUE GRANTED BY regress_du_admin;\nGRANT regress_du_role0 TO regress_du_role1 WITH ADMIN FALSE, INHERIT TRUE, SET FALSE GRANTED BY regress_du_role1;\nGRANT regress_du_role0 TO regress_du_role2 WITH ADMIN FALSE, INHERIT TRUE , SET TRUE GRANTED BY regress_du_role1;\nGRANT regress_du_role0 TO regress_du_role1 WITH ADMIN FALSE, INHERIT FALSE, SET TRUE GRANTED BY regress_du_role2;\nGRANT regress_du_role0 TO regress_du_role2 WITH ADMIN FALSE, INHERIT FALSE, SET FALSE GRANTED BY regress_du_role2;\n\n\\drg regress_du_role*\n\\du regress_du_role*\n\nDROP ROLE regress_du_role0;\nDROP ROLE regress_du_role1;\nDROP ROLE regress_du_role2;\nDROP ROLE regress_du_admin;\n\n-- Test display of empty privileges.\nBEGIN;\n-- Create an owner for tested objects because output contains owner name.\nCREATE ROLE regress_zeropriv_owner;\nSET LOCAL ROLE regress_zeropriv_owner;\n\nCREATE DOMAIN regress_zeropriv_domain AS int;\nREVOKE ALL ON DOMAIN regress_zeropriv_domain FROM CURRENT_USER, PUBLIC;\n\\dD+ regress_zeropriv_domain\n\nCREATE PROCEDURE regress_zeropriv_proc() LANGUAGE sql AS '';\nREVOKE ALL ON PROCEDURE regress_zeropriv_proc() FROM CURRENT_USER, PUBLIC;\n\\df+ regress_zeropriv_proc\n\nCREATE TABLE regress_zeropriv_tbl (a int);\nREVOKE ALL ON TABLE regress_zeropriv_tbl FROM CURRENT_USER;\n\\dp regress_zeropriv_tbl\n\nCREATE TYPE regress_zeropriv_type AS (a int);\nREVOKE ALL ON TYPE regress_zeropriv_type FROM CURRENT_USER, PUBLIC;\n\\dT+ regress_zeropriv_type\n\nROLLBACK;\n\n-- Test display of default privileges with \\pset null.\nCREATE TABLE defprivs (a int);\n\\pset null '(default)'\n\\z defprivs\n\\zx defprivs\n\\pset null ''\nDROP TABLE defprivs;", "explanation": "PL/pgSQL object from PostgreSQL core test for Psql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 13052, "num_statements": 62} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 175).", "schema": null, "sql": "SELECT to_char(d1, 'YYYY A.D. YYYY a.d. YYYY bc HH:MI:SS P.M. HH:MI:SS p.m. HH:MI:SS pm')\n FROM TIMESTAMPTZ_TBL;", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT to_char(d1, 'YYYY A.D. YYYY a.d. YYYY bc HH:MI:SS P.M. HH:MI:SS p.m. HH:MI:SS pm')\n FROM TIMESTAMPTZ_TBL) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 114, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 126).", "schema": null, "sql": "$$;\n\ndelete from parent where aid = 1;\nselect * from parent; select * from child;\n\ndrop table parent, child;\n\ndrop function parent_upd_func();\ndrop function parent_del_func();\ndrop function child_ins_func();\ndrop function child_del_func();\n\n-- similar case, but with a self-referencing FK so that parent and child\n-- rows can be affected by a single operation\n\ncreate temp table self_ref_trigger (\n id int primary key,\n parent int references self_ref_trigger,\n data text,\n nchildren int not null default 0\n);\n\ncreate function self_ref_trigger_ins_func()\n returns trigger language plpgsql as\n$$\nbegin\n if new.parent is not null then\n update self_ref_trigger set nchildren = nchildren + 1\n where id = new.parent;", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 732, "num_statements": 11} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 59).", "schema": null, "sql": "-- these are obsolete/deprecated:\nCREATE OPERATOR @ (\n\tLEFTARG = cube, RIGHTARG = cube, PROCEDURE = cube_contains,\n\tCOMMUTATOR = '~',\n\tRESTRICT = contsel, JOIN = contjoinsel\n);", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 176, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'brin' (example 84).", "schema": null, "sql": "CREATE INDEX brin_test_toast_idx ON brintest_3 USING brin (b, c);", "explanation": "DDL from PostgreSQL core regression test for Brin.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 65, "num_statements": 1} {"question": "pgTAP test for Functap (assertion 80).", "schema": null, "sql": "SELECT * FROM check_test(\n function_lang_is( 'someproc', 'sql', 'whatever' ),\n true,\n 'function_lang_is(schema, proc, desc)',\n 'whatever',\n ''\n);", "explanation": "SQL assertion from pgTAP test suite for Functap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 160, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'partition_join' (example 6).", "schema": null, "sql": "INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0;", "explanation": "DML from PostgreSQL core regression test for Partition Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 103, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 23).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION diag( VARIADIC text[] );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 59, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'domain' (example 3).", "schema": null, "sql": "create domain dependenttypetest domaindroptest;", "explanation": "DDL from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_advanced", "is_postgresql_specific": true, "sql_length": 47, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'stats_ext' (example 11).", "schema": null, "sql": "CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;", "explanation": "DDL from PostgreSQL core regression test for Stats Ext.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 143, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'partition_info' (example 12).", "schema": null, "sql": "CREATE TABLE ptif_test2 PARTITION OF ptif_test\n FOR VALUES FROM (100) TO (200);", "explanation": "DDL from PostgreSQL core regression test for Partition Info.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'rangefuncs' (example 18).", "schema": null, "sql": "create temporary view vw_ord as select * from (values (1)) v(n) join rows from(rngfunct(1),rngfunct(2)) with ordinality as z(a,b,c,d,ord) on (n=ord);", "explanation": "DDL from PostgreSQL core regression test for Rangefuncs.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "query_join", "is_postgresql_specific": false, "sql_length": 149, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'cube' (example 204).", "schema": null, "sql": "SELECT distance_taxicab('(1,1)'::cube, '(4,5)'::cube);", "explanation": "Example query from the 'cube' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "query_select", "is_postgresql_specific": true, "sql_length": 54, "num_statements": 1} {"question": "pgTAP test for Aretap (assertion 129).", "schema": null, "sql": "SELECT * FROM check_test(\n rules_are( 'fou', ARRAY['ins_me', 'upd_me', 'del_me'] ),\n false,\n 'rules_are(table, rules) + missing',\n 'Relation fou should have the correct rules',\n ' Missing rules:\n del_me'\n);", "explanation": "SQL assertion from pgTAP test suite for Aretap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 231, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'matview' (example 82).", "schema": null, "sql": "INSERT INTO mvtest_foo VALUES(3, 4, 5);", "explanation": "DML from PostgreSQL core regression test for Matview.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'inet' (example 16).", "schema": null, "sql": "INSERT INTO INET_TBL (c, i) VALUES ('10', '9.1.2.3/8');", "explanation": "DML from PostgreSQL core regression test for Inet.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 55, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'rules' (example 157).", "schema": null, "sql": "delete from rtest_t7;", "explanation": "DML from PostgreSQL core regression test for Rules.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_delete", "is_postgresql_specific": false, "sql_length": 21, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'domain' (example 208).", "schema": null, "sql": "update domnotnull set col1 = null;\t\t-- fails\nselect conname, pg_get_constraintdef(oid) from pg_constraint\n where contypid = 'dnotnulltest'::regtype;", "explanation": "DML from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_update", "is_postgresql_specific": false, "sql_length": 148, "num_statements": 2} {"question": "PostgreSQL regression test 'int2': Write the SELECT query (example 25).", "schema": null, "sql": "SELECT i.* FROM INT2_TBL i WHERE i.f1 <= int2 '0';", "explanation": "Regression test for Int2 in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT i.* FROM INT2_TBL i WHERE i.f1 <= int2 '0') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 50, "num_statements": 1} {"question": "PL/pgSQL test: Plpython Trigger (example 5).", "schema": null, "sql": "CREATE TRIGGER users_update_trig BEFORE UPDATE ON users FOR EACH ROW\n\tEXECUTE PROCEDURE users_update ('willem');", "explanation": "PL/pgSQL example from PostgreSQL source test for Plpython Trigger.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "dml_update", "is_postgresql_specific": true, "sql_length": 112, "num_statements": 1} {"question": "PostgreSQL regression test 'foreign_data': Write the SELECT query (example 268).", "schema": null, "sql": "SELECT has_server_privilege(\n (SELECT oid FROM pg_roles WHERE rolname='regress_test_role'), 's8', 'USAGE');", "explanation": "Regression test for Foreign Data in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT has_server_privilege(\n (SELECT oid FROM pg_roles WHERE rolname='regress_test_role'), 's8', 'USAGE')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 110, "num_statements": 1} {"question": "PostgreSQL regression test 'inet': Write the SELECT query (example 63).", "schema": null, "sql": "SELECT * FROM inet_tbl WHERE i >= '192.168.1.0/24'::cidr ORDER BY i;", "explanation": "Regression test for Inet in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT * FROM inet_tbl WHERE i >= '192.168.1.0/24'::cidr ORDER BY i) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 68, "num_statements": 1} {"question": "Show a SQL definition from the citus project (row_types, item 9).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION identity_returner(x anyelement)\nRETURNS anyelement\nAS $$\nBEGIN\n\tRETURN x;\nEND;\n$$ language plpgsql;", "explanation": "SQL definition from the open-source citus PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 3} {"question": "PostgreSQL regression test 'roleattributes': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin';", "explanation": "Regression test for Roleattributes in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 210, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'citext' (example 299).", "schema": null, "sql": "INSERT INTO caster (uuid) VALUES ('a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::citext);", "explanation": "Example query from the 'citext' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 91, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 182).", "schema": null, "sql": "INSERT INTO names (name) VALUES ('Jesse');", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'btree_gin' (example 5).", "schema": null, "sql": "SELECT * FROM test_date WHERE i<'2004-10-26'::date ORDER BY i;", "explanation": "Example query from the 'btree_gin' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 62, "num_statements": 1} {"question": "PostgreSQL Indices: show example 1.", "schema": null, "sql": "CREATE TABLE test1 ( id integer, content varchar );", "explanation": "Example from PostgreSQL documentation on Indices.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 51, "num_statements": 1} {"question": "Show a SQL definition from the pg_partman project (pg_partman--4.6.1--4.6.2, item 12).", "schema": null, "sql": "CREATE OR REPLACE FUNCTION @extschema@.apply_foreign_keys(p_parent_table text, p_child_table text, p_job_id bigint DEFAULT NULL, p_debug boolean DEFAULT false) RETURNS void\n LANGUAGE plpgsql\n AS $$\nDECLARE\n\nex_context text;\nex_detail text;\nex_hint text;\nex_message text;\nv_count int := 0;\nv_job_id bigint;\nv_jobmon text;\nv_jobmon_schema text;\nv_new_search_path text;\nv_old_search_path text;\nv_parent_schema text;\nv_parent_tablename text;\nv_ref_schema text;\nv_ref_table text;\nv_relkind char;\nv_row record;\nv_schemaname text;\nv_sql text;\nv_step_id bigint;\nv_tablename text;\n\nBEGIN\n/*\n * Apply foreign keys that exist on the given parent to the given child table\n */\n\nSELECT jobmon INTO v_jobmon FROM @extschema@.part_config WHERE parent_table = p_parent_table;\n\nIF v_jobmon THEN\n SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon' AND e.extnamespace = n.oid;\n IF v_jobmon_schema IS NOT NULL THEN\n SELECT current_setting('search_path') INTO v_old_search_path;\n IF length(v_old_search_path) > 0 THEN\n v_new_search_path := '@extschema@,pg_temp,'||v_old_search_path;\n ELSE\n v_new_search_path := '@extschema@,pg_temp';\n END IF;\n v_new_search_path := format('%s,%s',v_jobmon_schema, v_new_search_path);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_new_search_path, 'false');\n END IF;\nEND IF;\n\nSELECT n.nspname, c.relname, c.relkind INTO v_parent_schema, v_parent_tablename, v_relkind\nFROM pg_catalog.pg_class c\nJOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\nWHERE n.nspname = split_part(p_parent_table, '.', 1)::name\nAND c.relname = split_part(p_parent_table, '.', 2)::name;\n\nIF v_relkind = 'p' THEN\n RAISE EXCEPTION 'This function cannot run on natively partitioned tables';\nELSIF v_relkind IS NULL THEN\n RAISE EXCEPTION 'Unable to find given table in system catalogs: %.%', v_parent_schema, v_parent_tablename;\nEND IF;\n\nSELECT n.nspname, c.relname INTO v_schemaname, v_tablename\nFROM pg_catalog.pg_class c\nJOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\nWHERE n.nspname = split_part(p_child_table, '.', 1)::name\nAND c.relname = split_part(p_child_table, '.', 2)::name;\n\nIF v_jobmon_schema IS NOT NULL THEN\n IF p_job_id IS NULL THEN\n v_job_id := add_job(format('PARTMAN APPLYING FOREIGN KEYS: %s', p_parent_table));\n ELSE -- Don't create a new job, add steps into given job\n v_job_id := p_job_id;\n END IF;\nEND IF;\n\nIF v_jobmon_schema IS NOT NULL THEN\n v_step_id := add_step(v_job_id, format('Applying foreign keys to %s if they exist on parent', p_child_table));\nEND IF;\n\nIF v_tablename IS NULL THEN\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'CRITICAL', format('Target child table (%s) does not exist.', p_child_table));\n PERFORM fail_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\n END IF;\n RAISE EXCEPTION 'Target child table (%) does not exist.', p_child_table;\n RETURN;\nEND IF;\n\nFOR v_row IN\n SELECT pg_get_constraintdef(con.oid) AS constraint_def\n FROM pg_catalog.pg_constraint con\n JOIN pg_catalog.pg_class c ON con.conrelid = c.oid\n JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid\n WHERE c.relname = v_parent_tablename\n AND n.nspname = v_parent_schema\n AND contype = 'f'\nLOOP\n v_sql := format('ALTER TABLE %I.%I ADD %s'\n , v_schemaname\n , v_tablename\n , v_row.constraint_def);\n\n IF p_debug THEN\n RAISE NOTICE 'Constraint creation query: %', v_sql;\n END IF;\n\n EXECUTE v_sql;\n\n IF v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'FK applied');\n END IF;\n v_count := v_count + 1;\n\nEND LOOP;\n\nIF v_count = 0 AND v_jobmon_schema IS NOT NULL THEN\n PERFORM update_step(v_step_id, 'OK', 'No FKs found on parent');\nEND IF;\n\n\nIF v_jobmon_schema IS NOT NULL THEN\n PERFORM close_job(v_job_id);\n EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false');\nEND IF;\n\nEXCEPTION\n WHEN OTHERS THEN\n GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT,\n ex_context = PG_EXCEPTION_CONTEXT,\n ex_detail = PG_EXCEPTION_DETAIL,\n ex_hint = PG_EXCEPTION_HINT;\n IF v_jobmon_schema IS NOT NULL THEN\n IF v_job_id IS NULL THEN\n EXECUTE format('SELECT %I.add_job(''PARTMAN CREATE APPLYING FOREIGN KEYS: %s'')', v_jobmon_schema, p_parent_table) INTO v_job_id;\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id;\n ELSIF v_step_id IS NULL THEN\n EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id;\n END IF;\n EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown'));\n EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id);\n END IF;\n RAISE EXCEPTION '%\nCONTEXT: %\nDETAIL: %\nHINT: %', ex_message, ex_context, ex_detail, ex_hint;\nEND\n$$;", "explanation": "SQL definition from the open-source pg_partman PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 5564, "num_statements": 71} {"question": "PostgreSQL Ddl: show example 41.", "schema": null, "sql": "CREATE TABLE products ( product_no integer, price numeric, valid_at daterange );", "explanation": "Example from PostgreSQL documentation on Ddl.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 80, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'with' (example 254).", "schema": null, "sql": "CREATE FUNCTION y_trigger() RETURNS trigger AS $$\nbegin\n raise notice 'y_trigger: a = %', new.a;\n return new;\nend;\n$$ LANGUAGE plpgsql;", "explanation": "DDL from PostgreSQL core regression test for With.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 137, "num_statements": 4} {"question": "PostgreSQL regression test 'rowsecurity': Write the SELECT query (example 839).", "schema": null, "sql": "SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass;", "explanation": "Regression test for Rowsecurity in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT (string_to_array(polqual, ':'))[7] AS inputcollid FROM pg_policy WHERE polrelid = 'coll_t'::regclass) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 108, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 31).", "schema": null, "sql": "INSERT INTO NE_CHECK_TBL VALUES (2);", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 36, "num_statements": 1} {"question": "PostgreSQL regression test 'updatable_views': Write the SELECT query (example 100).", "schema": null, "sql": "SELECT table_name, is_insertable_into\n FROM information_schema.tables\n WHERE table_name LIKE 'rw_view%'\n ORDER BY table_name;", "explanation": "Regression test for Updatable Views in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT table_name, is_insertable_into\n FROM information_schema.tables\n WHERE table_name LIKE 'rw_view%'\n ORDER BY table_name) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 126, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 374).", "schema": null, "sql": "SELECT interval 'infinity' * 'nan';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT interval 'infinity' * 'nan') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'encoding' (example 60).", "schema": null, "sql": "DROP FUNCTION test_valid_server_encoding;", "explanation": "PL/pgSQL object from PostgreSQL core test for Encoding.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 41, "num_statements": 1} {"question": "PostgreSQL regression test 'timestamptz': Write the SELECT query (example 134).", "schema": null, "sql": "SELECT d1 FROM TIMESTAMPTZ_TBL\n WHERE d1 >= timestamp with time zone '1997-01-02';", "explanation": "Regression test for Timestamptz in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT d1 FROM TIMESTAMPTZ_TBL\n WHERE d1 >= timestamp with time zone '1997-01-02') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 84, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'alter_table' (example 472).", "schema": null, "sql": "CREATE TABLE fail_part (LIKE list_parted);", "explanation": "DDL from PostgreSQL core regression test for Alter Table.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 3).", "schema": null, "sql": "-- dblink_connect_u allows non-superusers to use\n-- non-password authenticated connections, but initially\n-- privileges are revoked from public\nCREATE FUNCTION dblink_connect_u (text)\nRETURNS text\nAS 'MODULE_PATHNAME','dblink_connect'\nLANGUAGE C STRICT PARALLEL RESTRICTED SECURITY DEFINER;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "other", "is_postgresql_specific": false, "sql_length": 290, "num_statements": 1} {"question": "PostgreSQL regression test 'interval': Write the SELECT query (example 409).", "schema": null, "sql": "SELECT INTERVAL '2 minutes ago 5 days';", "explanation": "Regression test for Interval in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT INTERVAL '2 minutes ago 5 days') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 39, "num_statements": 1} {"question": "PostgreSQL regression test 'xid': Write the SELECT query (example 44).", "schema": null, "sql": "select * from pg_input_error_info('12:16:14,13', 'pg_snapshot');", "explanation": "Regression test for Xid in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from pg_input_error_info('12:16:14,13', 'pg_snapshot')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "PostgreSQL regression test 'groupingsets': Write the SELECT query (example 73).", "schema": null, "sql": "select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum\n from gstest2 group by cube (a,b) order by rsum, a, b;", "explanation": "Regression test for Groupingsets in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum\n from gstest2 group by cube (a,b) order by rsum, a, b) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "advanced", "category": "query_window_function", "is_postgresql_specific": true, "sql_length": 116, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 258).", "schema": null, "sql": "SELECT JSON_QUERY('\"a\"', '$.a' RETURNING queryfuncs_test_domain DEFAULT (select '\"1\"')::queryfuncs_test_domain ON ERROR);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY('\"a\"', '$.a' RETURNING queryfuncs_test_domain DEFAULT (select '\"1\"')::queryfuncs_test_domain ON ERROR)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 121, "num_statements": 1} {"question": "PostgreSQL regression test 'numeric': Write the SELECT query (example 963).", "schema": null, "sql": "select scale(numeric 'inf');", "explanation": "Regression test for Numeric in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select scale(numeric 'inf')) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 28, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'plpgsql' (example 528).", "schema": null, "sql": "$$ language plpgsql parallel safe;\n\nset debug_parallel_query to on;\n\nexplain (verbose, costs off) select error_trap_test();\nselect error_trap_test();\n\nreset debug_parallel_query;\n\ndrop function error_trap_test();\ndrop function zero_divide();\n\n-- check cases where implicit SQLSTATE variable could be confused with\n-- SQLSTATE as a keyword, cf bug #5524\ncreate or replace function raise_test() returns void as $$\nbegin\n perform 1/0;", "explanation": "PL/pgSQL object from PostgreSQL core test for Plpgsql.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 432, "num_statements": 8} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 5).", "schema": null, "sql": "CREATE FUNCTION gbt_macad8_picksplit(internal, internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 123, "num_statements": 1} {"question": "PostgreSQL Query: show example 2.", "schema": null, "sql": "CREATE TABLE cities ( name varchar(80), location point );", "explanation": "Example from PostgreSQL documentation on Query.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 57, "num_statements": 1} {"question": "Write the PL/pgSQL object from PostgreSQL regression test 'triggers' (example 107).", "schema": null, "sql": "$$;\ncreate trigger depth_b_tr before insert on depth_b\n for each row execute procedure depth_b_tf();\n\ncreate function depth_c_tf() returns trigger\n language plpgsql as $$\nbegin\n raise notice '%: depth = %', tg_name, pg_trigger_depth();", "explanation": "PL/pgSQL object from PostgreSQL core test for Triggers.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "plpgsql_trigger", "is_postgresql_specific": true, "sql_length": 238, "num_statements": 3} {"question": "Write the DDL statement from PostgreSQL regression test 'join' (example 540).", "schema": null, "sql": "create temp table t (a int unique, b int);", "explanation": "DDL from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 42, "num_statements": 1} {"question": "PostgreSQL regression test 'case': Write the SELECT query (example 44).", "schema": null, "sql": "SELECT CASE\n (CASE vol('bar')\n WHEN 'foo' THEN 'it was foo!'\n WHEN vol(null) THEN 'null input'\n WHEN 'bar' THEN 'it was bar!' END\n )\n WHEN 'it was foo!' THEN 'foo recognized'\n WHEN 'it was bar!' THEN 'bar recognized'\n ELSE 'unrecognized' END;", "explanation": "Regression test for Case in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT CASE\n (CASE vol('bar')\n WHEN 'foo' THEN 'it was foo!'\n WHEN vol(null) THEN 'null input'\n WHEN 'bar' THEN 'it was bar!' END\n )\n WHEN 'it was foo!' THEN 'foo recognized'\n WHEN 'it was bar!' THEN 'bar recognized'\n ELSE 'unrecognized' END) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 256, "num_statements": 1} {"question": "PL/pgSQL test: Plperl Util (example 5).", "schema": null, "sql": "-- test quote_ident\n\ncreate or replace function perl_quote_ident() returns setof text language plperl as $$\n\treturn_next \"undef: \".quote_ident(undef); # generates undef warning if warnings enabled\n\treturn_next \"$_: \".quote_ident($_)\n\t\tfor q{foo}, q{a'b}, q{a\"b}, q{c''d}, q{e\\f}, q{g.h}, q{};\n\treturn undef;\n$$;", "explanation": "PL/pgSQL example from PostgreSQL source test for Plperl Util.", "validation_query": null, "source": "plpgsql_source", "difficulty": "advanced", "category": "plpgsql_function", "is_postgresql_specific": true, "sql_length": 311, "num_statements": 4} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 64).", "schema": null, "sql": "CREATE FUNCTION g_cube_penalty(internal,internal,internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "advanced", "category": "other", "is_postgresql_specific": false, "sql_length": 139, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'jsonb_plperl' (example 14).", "schema": null, "sql": "SELECT testTextToJsonbObject(NULL);", "explanation": "Example query from the 'jsonb_plperl' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pgcrypto' (example 13).", "schema": null, "sql": "select encode(decrypt(encrypt('foo', '0123456', 'aes') || '\\x00'::bytea, '0123456', 'aes'), 'escape');", "explanation": "Example query from the 'pgcrypto' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 102, "num_statements": 1} {"question": "pgTAP test for Pgtap--Unpackaged--0.91.0 (assertion 399).", "schema": null, "sql": "ALTER EXTENSION pgtap ADD FUNCTION has_leftop ( NAME, NAME, NAME );", "explanation": "SQL assertion from pgTAP test suite for Pgtap--Unpackaged--0.91.0.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "PostgreSQL regression test 'strings': Write the SELECT query (example 261).", "schema": null, "sql": "SELECT 'indio' LIKE 'in_o' AS \"false\";", "explanation": "Regression test for Strings in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT 'indio' LIKE 'in_o' AS \"false\") AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 38, "num_statements": 1} {"question": "pgTAP test for Do Tap (assertion 10).", "schema": null, "sql": "SELECT is(\n findfuncs('foo'),\n CASE WHEN pg_version_num() < 80300 THEN NULL ELSE '{}'::text[] END,\n 'findfuncs(unknown) should find no tests'\n);", "explanation": "SQL assertion from pgTAP test suite for Do Tap.", "validation_query": null, "source": "pgtap_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 153, "num_statements": 1} {"question": "PostgreSQL regression test 'regex': Write the SELECT query (example 7).", "schema": null, "sql": "select 'abc abd abc' ~ '^(\\w+)( \\1)+$' as f;", "explanation": "Regression test for Regex in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select 'abc abd abc' ~ '^(\\w+)( \\1)+$' as f) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 44, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'constraints' (example 468).", "schema": null, "sql": "INSERT INTO notnull_tbl1_upg VALUES (NULL, 1), (NULL, 2), (300, 3);", "explanation": "DML from PostgreSQL core regression test for Constraints.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 67, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'join' (example 786).", "schema": null, "sql": "create table fkest (x integer, x10 integer, x10b integer, x100 integer);", "explanation": "DDL from PostgreSQL core regression test for Join.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": false, "sql_length": 72, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 137).", "schema": null, "sql": "SELECT jsonb_exists_all('{\"a\":null, \"b\":\"qq\"}', ARRAY['a','b']);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_exists_all('{\"a\":null, \"b\":\"qq\"}', ARRAY['a','b'])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "intermediate", "category": "query_select", "is_postgresql_specific": false, "sql_length": 64, "num_statements": 1} {"question": "Write the SQL definition from PostgreSQL contrib extension 'contrib' (item 112).", "schema": null, "sql": "CREATE FUNCTION gbt_tstz_compress(internal)\nRETURNS internal\nAS 'MODULE_PATHNAME'\nLANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;", "explanation": "SQL definition from the 'contrib' PostgreSQL contrib extension.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 124, "num_statements": 1} {"question": "pgTAP test for Resultset (assertion 293).", "schema": null, "sql": "CREATE table dubs (pk SERIAL PRIMARY KEY, id int, name text);", "explanation": "SQL assertion from pgTAP test suite for Resultset.", "validation_query": null, "source": "pgtap_tests", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 61, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'create_index' (example 172).", "schema": null, "sql": "CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);", "explanation": "DDL from PostgreSQL core regression test for Create Index.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "intermediate", "category": "ddl_index", "is_postgresql_specific": false, "sql_length": 81, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 423).", "schema": null, "sql": "select * from jsonb_to_record('{\"ia2\": [[[1], [2], [3]]]}') as x(ia2 int4[][]);", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select * from jsonb_to_record('{\"ia2\": [[[1], [2], [3]]]}') as x(ia2 int4[][])) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 79, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'timestamp' (example 91).", "schema": null, "sql": "INSERT INTO TIMESTAMP_TBL VALUES ('Feb 29 17:32:01 1997');", "explanation": "DML from PostgreSQL core regression test for Timestamp.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": false, "sql_length": 58, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'pg_surgery' (example 31).", "schema": null, "sql": "select heap_force_freeze('mvw'::regclass, ARRAY['(0, 3)']::tid[]);", "explanation": "Example query from the 'pg_surgery' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 66, "num_statements": 1} {"question": "Show a query using PostgreSQL contrib extension 'xml2' (example 15).", "schema": null, "sql": "SELECT * FROM\nxpath_table('article_id',\n 'article_xml',\n 'articles',\n '/article/author|/article/pages|/article/title',\n 'date_entered > ''2003-01-01'' ')\nAS t(article_id integer, author text, page_count integer, title text);", "explanation": "Example query from the 'xml2' extension test/demo.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 268, "num_statements": 1} {"question": "Show a SQL definition from the pglogical project (pglogical--2.2.1, item 50).", "schema": null, "sql": "CREATE FUNCTION pglogical.wait_for_table_sync_complete(subscription_name name, relation regclass)\nRETURNS void RETURNS NULL ON NULL INPUT VOLATILE LANGUAGE c AS 'MODULE_PATHNAME', 'pglogical_wait_for_table_sync_complete';", "explanation": "SQL definition from the open-source pglogical PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "other", "is_postgresql_specific": false, "sql_length": 221, "num_statements": 1} {"question": "PostgreSQL regression test 'name': Write the SELECT query (example 18).", "schema": null, "sql": "SELECT c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*';", "explanation": "Regression test for Name in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*') AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 46, "num_statements": 1} {"question": "PostgreSQL regression test 'arrays': Write the SELECT query (example 265).", "schema": null, "sql": "select '[1:]={1}'::int[];", "explanation": "Regression test for Arrays in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (select '[1:]={1}'::int[]) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 25, "num_statements": 1} {"question": "Write the DML statement from PostgreSQL regression test 'domain' (example 112).", "schema": null, "sql": "insert into dcomptable values (array[row(1,2)]::dcomptypea); -- fail on uniqueness\ninsert into dcomptable (d1[1]) values(row(9,10));", "explanation": "DML from PostgreSQL core regression test for Domain.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 133, "num_statements": 2} {"question": "Show a SQL definition from the pglogical project (pglogical--2.2.1, item 39).", "schema": null, "sql": "CREATE TABLE pglogical.queue (\n queued_at timestamp with time zone NOT NULL,\n role name NOT NULL,\n replication_sets text[],\n message_type \"char\" NOT NULL,\n message json NOT NULL\n);", "explanation": "SQL definition from the open-source pglogical PostgreSQL project.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "ddl_table", "is_postgresql_specific": true, "sql_length": 195, "num_statements": 1} {"question": "PostgreSQL regression test 'jsonb': Write the SELECT query (example 150).", "schema": null, "sql": "SELECT jsonb_typeof('[]') AS array;", "explanation": "Regression test for Jsonb in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT jsonb_typeof('[]') AS array) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": false, "sql_length": 35, "num_statements": 1} {"question": "PostgreSQL regression test 'sqljson_queryfuncs': Write the SELECT query (example 145).", "schema": null, "sql": "SELECT JSON_QUERY(jsonb '[1,2]', '$' RETURNING json);", "explanation": "Regression test for Sqljson Queryfuncs in PostgreSQL core test suite.", "validation_query": "SELECT count(*) FROM (SELECT JSON_QUERY(jsonb '[1,2]', '$' RETURNING json)) AS _sub;", "source": "postgresql_regression_tests", "difficulty": "basic", "category": "query_select", "is_postgresql_specific": true, "sql_length": 53, "num_statements": 1} {"question": "Write the DDL statement from PostgreSQL regression test 'returning' (example 35).", "schema": null, "sql": "CREATE OR REPLACE RULE voo_i AS ON INSERT TO voo DO INSTEAD\n INSERT INTO foo VALUES(new.*, 57) RETURNING f1, f2;", "explanation": "DDL from PostgreSQL core regression test for Returning.", "validation_query": null, "source": "community_sql_datasets", "difficulty": "basic", "category": "dml_insert", "is_postgresql_specific": true, "sql_length": 113, "num_statements": 1} {"question": "PostgreSQL regression test 'tsearch': Write the SELECT query (example 286).", "schema": null, "sql": "SELECT ts_headline('english', '\n\n\n\nSea view wow foo bar qq\nYES  \nff-bg\n